By this point in the analysis I have filtered the data using the Seurat Pipeline, and have identified cell types using the CHETAH pipeline.

Contents:

Setup

library( here)
library( Seurat)
library( dplyr)
library( fs)
library(CHETAH)
## Warning: Package 'CHETAH' is deprecated and will be removed from Bioconductor
##   version 3.16
library(gplots)
library(reactome.db)
library(ReactomePA)
library(org.Hs.eg.db)
library(dendextend)
library(genefu)
library(limma)
library(edgeR)
library(affy)
library(hgu133plus2.db)
library(WGCNA)
library(goseq)

setwd("/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq")

source('heatmap.3_function.R')
# here::i_am("/brca-scrnaseq")
set_here("/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq")
## File .here already exists in /Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq
here::dr_here()
## here() starts at /Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq.
## - This directory contains a file ".here"
## - Initial working directory: /Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq
## - Current working directory: /Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq
here()
## [1] "/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq"
setwd("/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq")
load(file = "Organised to CHETAH.R", verbose = T)
## Loading objects:
##   counts1
##   biokey1
##   brcaDat1
##   counts
##   metadata
##   patients
##   cells
##   top10
##   all.genes
##   input
##   input_brcaDat1

In the above sections of code I just loaded the libraries necessary for my analysis and loaded in the post-filtering scRNAseq data.

Matching CHETAH to Bassez

chetah3 =  brcaDat1@meta.data[["CHETAH"]]

chetah3[brcaDat1@meta.data[["CHETAH"]] == "Node1"] = brcaDat1@meta.data[["cellType"]][brcaDat1@meta.data[["CHETAH"]]=="Node1"]
table(chetah3, brcaDat1@meta.data[["cellType"]])
##                
## chetah3         B_cell Cancer_cell Endothelial_cell Fibroblast Mast_cell
##   B_cell          2952           0                0          0         0
##   CAF                0           0                0       8780         0
##   Cancer_cell        0          14                0          0         0
##   CD4 T cell         2           0                0          0         0
##   CD8 T cell         2           0                0          0         0
##   Dendritic          0           0                0          0         0
##   Endothelial        0           0              725          0         0
##   Macrophage         0           0                0          0         0
##   Mast               0           0                0          0         1
##   Mast_cell          0           0                0          0         2
##   Myeloid_cell       0           0                0          0         0
##   Myofibroblast      0           0                0        174         0
##   NK                 0           0                0          0         0
##   Node10             0           1                0       1072         0
##   Node2             57           3                0          0         0
##   Node3             13           3                0          0         0
##   Node4             19           1                0          0         0
##   Node5              7           5                0          0         0
##   Node6              2           0                0          0         0
##   Node7              0           0                0          0         1
##   Node8              0           0                0          0         0
##   Node9              0           2               94       1790         0
##   pDC                0           0                0          0         0
##   Plasma           235           0                0          0         0
##   reg. T cell        0           0                0          0         0
##   T_cell             0           0                0          0         0
##   Unassigned      6759       30043             3477       6155       717
##                
## chetah3         Myeloid_cell   pDC T_cell
##   B_cell                   0     0      0
##   CAF                      0     0      0
##   Cancer_cell              0     0      0
##   CD4 T cell               0     0    892
##   CD8 T cell               0     0    815
##   Dendritic               15     0      0
##   Endothelial              0     0      0
##   Macrophage             113     0      0
##   Mast                     0     0      0
##   Mast_cell                0     0      0
##   Myeloid_cell            68     0      0
##   Myofibroblast            0     0      0
##   NK                       0     0    158
##   Node10                   0     0      0
##   Node2                    0     0   1775
##   Node3                    2     0   5507
##   Node4                    0     0    694
##   Node5                    0     0   3522
##   Node6                    0     0    614
##   Node7                   13     0      0
##   Node8                   37     0      0
##   Node9                    0     0      0
##   pDC                      0    50      0
##   Plasma                   0     0      0
##   reg. T cell              0     0    513
##   T_cell                   0     0   6440
##   Unassigned           10809   655  27193
replace = brcaDat1@meta.data[["CHETAH"]]=="Node1"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]

replace = brcaDat1@meta.data[["CHETAH"]]=="Node2"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]

replace = brcaDat1@meta.data[["CHETAH"]]=="Node3"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]

replace = brcaDat1@meta.data[["CHETAH"]]=="Node4"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]

replace = brcaDat1@meta.data[["CHETAH"]]=="Node5"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]

replace = brcaDat1@meta.data[["CHETAH"]]=="Node6"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]

replace = brcaDat1@meta.data[["CHETAH"]]=="Node8"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]

replace = brcaDat1@meta.data[["CHETAH"]]=="Node9"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]

replace = brcaDat1@meta.data[["CHETAH"]]=="Node10"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]

replace = brcaDat1@meta.data[["CHETAH"]]=="Unassigned"
chetah3[ replace ] = brcaDat1@meta.data[["cellType"]][ replace ]

table(chetah3, brcaDat1@meta.data[["cellType"]])
##                   
## chetah3            B_cell Cancer_cell Endothelial_cell Fibroblast Mast_cell
##   B_cell             9809           0                0          0         0
##   CAF                   0           0                0       8780         0
##   Cancer_cell           0       30072                0          0         0
##   CD4 T cell            2           0                0          0         0
##   CD8 T cell            2           0                0          0         0
##   Dendritic             0           0                0          0         0
##   Endothelial           0           0              725          0         0
##   Endothelial_cell      0           0             3571          0         0
##   Fibroblast            0           0                0       9017         0
##   Macrophage            0           0                0          0         0
##   Mast                  0           0                0          0         1
##   Mast_cell             0           0                0          0       719
##   Myeloid_cell          0           0                0          0         0
##   Myofibroblast         0           0                0        174         0
##   NK                    0           0                0          0         0
##   Node7                 0           0                0          0         1
##   pDC                   0           0                0          0         0
##   Plasma              235           0                0          0         0
##   reg. T cell           0           0                0          0         0
##   T_cell                0           0                0          0         0
##                   
## chetah3            Myeloid_cell   pDC T_cell
##   B_cell                      0     0      0
##   CAF                         0     0      0
##   Cancer_cell                 0     0      0
##   CD4 T cell                  0     0    892
##   CD8 T cell                  0     0    815
##   Dendritic                  15     0      0
##   Endothelial                 0     0      0
##   Endothelial_cell            0     0      0
##   Fibroblast                  0     0      0
##   Macrophage                113     0      0
##   Mast                        0     0      0
##   Mast_cell                   0     0      0
##   Myeloid_cell            10916     0      0
##   Myofibroblast               0     0      0
##   NK                          0     0    158
##   Node7                      13     0      0
##   pDC                         0   705      0
##   Plasma                      0     0      0
##   reg. T cell                 0     0    513
##   T_cell                      0     0  45745

Here I matched the Bassez et al identified cell-types with the CHETAH-identified cell types. This is because the cell types identified by Bassez et al were not specific,rather in categories, so they do not necessarily align with those of CHETAH identification. However, CHETAH identification could not identify a large proportion of cells, so all of these uncategorised cells can now at least be associated with a category of cell type.

Aggregating Counts

brcaDat1@meta.data$patient_trt = paste0(brcaDat1@meta.data$patient_id, "_", brcaDat1@meta.data$timepoint) 
aggrCounts = AggregateExpression(brcaDat1, set = brcaDat1@meta.data$Cell, group.by = "patient_trt")
## The following functions and any applicable methods accept the dots: CreateSeuratObject
counts = aggrCounts[[1]]
dim(counts)
## [1] 25288    62
head(counts)
##          BIOKEY_1_On BIOKEY_1_Pre BIOKEY_10_On BIOKEY_10_Pre BIOKEY_11_On
## A1BG      1900.38751    2420.4932    2037.5990     2090.1745   1381.90946
## A1BG-AS1   314.69457     424.6407     470.1492      498.3933    219.37918
## A2M      23268.83314    2686.2540    2666.0859     4816.8543    832.72719
## A2M-AS1     64.37906     200.7731     173.9043      115.5187     48.49831
## A4GALT    1510.14001     200.9704     145.5802      286.4946     34.93897
## AAAS       841.60844    1103.2443    1093.1193     1348.4628    479.36815
##          BIOKEY_11_Pre BIOKEY_12_On BIOKEY_12_Pre BIOKEY_13_On BIOKEY_13_Pre
## A1BG         559.64175    1855.8546     3231.5822    326.62392     1715.9383
## A1BG-AS1      46.86798     228.2988      205.0674     30.22590      343.3174
## A2M          390.02989    1945.9009     5295.7009   2985.44388     7077.9538
## A2M-AS1       24.69787     145.4389      121.9377     17.88692      125.7846
## A4GALT        10.93204     161.8869      456.1433    175.20030      630.6713
## AAAS         312.24811     679.2446      874.5552    212.84814      821.0722
##          BIOKEY_14_On BIOKEY_14_Pre BIOKEY_15_On BIOKEY_15_Pre BIOKEY_16_On
## A1BG        356.43841    1254.12969    1974.9635    1154.70181    1842.4072
## A1BG-AS1     24.04932     147.91064     337.1768     244.01799     272.2644
## A2M        2924.69116    3529.77582    2488.7363    3454.74353    6626.4567
## A2M-AS1      23.41485      63.36445     115.2525      39.16571     157.6789
## A4GALT      136.38584     362.48009     237.5187     260.08511     460.8066
## AAAS        131.35655     651.31124     865.1258     477.67563     720.9153
##          BIOKEY_16_Pre BIOKEY_17_On BIOKEY_17_Pre BIOKEY_18_On BIOKEY_18_Pre
## A1BG         1801.9798    1689.3551     1247.2119    991.87062     460.38283
## A1BG-AS1      351.0143     182.0227      111.8949    146.67247      57.42074
## A2M          3195.9099    3507.5651     1904.2612   6879.32330    5202.23437
## A2M-AS1       157.2182      40.8649       54.8405     29.86681      25.79275
## A4GALT        123.1863     806.6421      251.6255    700.19699     196.93697
## AAAS          676.7908     336.4861      310.9628    605.52253     219.99570
##          BIOKEY_19_On BIOKEY_19_Pre BIOKEY_2_On BIOKEY_2_Pre BIOKEY_20_On
## A1BG        863.48156     1542.6933  1976.77315   1558.08227    792.17530
## A1BG-AS1     84.36332      309.2133   162.32359    169.94594    122.26586
## A2M        3607.52909    11753.8181  7373.98091   7569.68694   6064.83994
## A2M-AS1      43.26117      131.6930    43.49986     29.03899     15.74553
## A4GALT      212.73272      441.0435   269.62462    105.57977    334.47803
## AAAS        312.03455      723.3303   534.54011    413.87989    494.24070
##          BIOKEY_20_Pre BIOKEY_21_On BIOKEY_21_Pre BIOKEY_22_On BIOKEY_22_Pre
## A1BG         64.378561   1489.29558     878.24589    1398.0354     191.79383
## A1BG-AS1     12.292394     88.67642      98.07367     147.4568      17.03978
## A2M        1491.594254   2693.08371    2556.03826    2675.3610    1233.02681
## A2M-AS1       3.000855     18.21744      18.44090      53.9638      20.77072
## A4GALT      110.549904   1475.06842     332.45256     982.2869     111.21624
## AAAS         51.997246    643.38051     270.48765     676.9780     163.44320
##          BIOKEY_23_On BIOKEY_23_Pre BIOKEY_24_On BIOKEY_24_Pre BIOKEY_25_On
## A1BG        2208.5338      76.48401    712.50143     653.02749    213.27953
## A1BG-AS1     292.7142      31.29229     73.92808      45.52037     33.37356
## A2M         6464.4014     533.28570   3933.47287    2117.44697    352.39940
## A2M-AS1      118.7594      10.26405     67.45770      18.72431      0.00000
## A4GALT       497.2741      35.67510    461.98096     169.50604     44.75861
## AAAS         546.7235      20.59312    269.69741     213.80754     82.39351
##          BIOKEY_25_Pre BIOKEY_26_On BIOKEY_26_Pre BIOKEY_27_On BIOKEY_27_Pre
## A1BG        101.638815   5229.66247     857.58915    751.93290     1546.5327
## A1BG-AS1      6.409162    211.84285      52.28328    110.01495      226.2388
## A2M         648.846994  18311.18892    4039.69997   3863.13499     7730.4527
## A2M-AS1       0.000000     44.73858      33.09237     60.34836      125.1434
## A4GALT       48.911380    738.38370      87.17145    454.23169      638.9537
## AAAS         58.852298    505.28363     147.76195    258.14705      636.9252
##          BIOKEY_28_On BIOKEY_28_Pre BIOKEY_29_On BIOKEY_29_Pre BIOKEY_3_On
## A1BG       1085.89258     372.01264    60.421630     652.40703  1702.58326
## A1BG-AS1     77.89455      92.53131    11.663869      44.64777   139.84319
## A2M        2271.96717    3227.92061   522.678685    1831.08341  2802.43695
## A2M-AS1      21.21864      16.34732     5.359943      14.15381    45.69168
## A4GALT       13.69600     153.57510    48.620618     115.80593   401.35488
## AAAS        232.72662     189.72427    35.980999     176.29975   358.86291
##          BIOKEY_3_Pre BIOKEY_30_On BIOKEY_30_Pre BIOKEY_31_On BIOKEY_31_Pre
## A1BG        1870.5906   1167.76271    2039.68894   1983.15751    1186.54770
## A1BG-AS1     225.2130    105.42174     170.07499    199.62419      68.97513
## A2M         3040.0649   2919.04175    3159.96041   4374.99996    1165.70749
## A2M-AS1       37.2509     27.48489      22.51573     67.24141      12.15292
## A4GALT       290.6268    102.85704      96.79814    464.53299     426.35554
## AAAS         441.3157    406.16096     504.91820    485.53125     202.89897
##          BIOKEY_4_On BIOKEY_4_Pre BIOKEY_5_On BIOKEY_5_Pre BIOKEY_6_On
## A1BG      994.603168   2179.06098   137.75410   1031.56107   839.92827
## A1BG-AS1  102.681087    293.93446    18.66668    164.84794   105.37274
## A2M       566.885365   1131.71116   343.50344    563.94215  5049.67645
## A2M-AS1    52.216702     72.76798    14.36486    135.32170    28.95516
## A4GALT      7.668021     87.94784    33.58809     34.20967   510.43514
## AAAS      504.467631    884.23504    29.61075    374.69285   482.93330
##          BIOKEY_6_Pre BIOKEY_7_On BIOKEY_7_Pre BIOKEY_8_On BIOKEY_8_Pre
## A1BG        942.92485  1477.88713    209.93130   559.91279    138.94097
## A1BG-AS1    108.88995   113.41705     17.15663    74.87933     33.23953
## A2M        3731.40542  1403.38859    179.28400   229.90130    287.42814
## A2M-AS1      23.07205    44.48068      0.00000    45.09714      0.00000
## A4GALT      267.64798   388.33220     29.14525    11.44350     16.51448
## AAAS        553.78968   288.95222     55.61864   208.89668     63.90025
##          BIOKEY_9_On BIOKEY_9_Pre
## A1BG      360.206896    968.90932
## A1BG-AS1   36.730616    100.41154
## A2M      1436.647896   7330.89666
## A2M-AS1     7.297468     29.82598
## A4GALT     78.491086    456.12146
## AAAS      106.699061    412.62730
dim(aggrCounts[[1]])
## [1] 25288    62
par(mar=c(11,5,2,0))
colSums(aggrCounts[[1]]) %>% sort() %>% barplot(., las=3, ylab = "Number of reads", cex.lab=1.2, cex.axis=1.2, cex=1)
mtext('Tumour sample', 1, 9, cex = 1.2)
# abline(h=3e6, col='blue', lty=3)
abline(h=3e6, col='blue', lty=3, lwd=1.3)
abline(h=5e6, col='blue', lty=3, lwd=1.3)

# Here I aggregated the count data across all cells for each sample, so that every gene has one representative expression level in each tumour. This makes the data mimic bulk RNA sequencing moreso than scRNAseq.
par(mar=c(11,5,2,0))
colSums(counts) %>% sort() %>% barplot(., las=3, ylab = "Number of reads", cex.lab=1.2, cex.axis=1.2, cex=1.2)
mtext('Tumour sample', 1, 9, cex = 1.2)
# abline(h=3e6, col='blue', lty=3)
abline(h=2e6, col='blue', lty=3, lwd=1.3)

# In the grph above, I plotted the total number of counts for each sample. The two horizontal lines represent cut-off thresholds of 3 million counts and 1 milion counts. Samples with low total counts may not be accurate and so must be disregarded. Howeve, disregarding all tumours with a total count of under 3 million removes almost a third of the data (19 samples as shown below), so instead I will remove any samples with less than 2 million counts.
(colSums(counts) < 3000000) %>%  sum()
## [1] 5
colnames(counts)[colSums(counts) < 3000000] %>%  t() %>%  t()
##                        [,1]           
## data[, 1]BIOKEY_20_Pre "BIOKEY_20_Pre"
## data[, 1]BIOKEY_23_Pre "BIOKEY_23_Pre"
## data[, 1]BIOKEY_25_Pre "BIOKEY_25_Pre"
## data[, 1]BIOKEY_29_On  "BIOKEY_29_On" 
## data[, 1]BIOKEY_7_Pre  "BIOKEY_7_Pre"
head(counts)
##          BIOKEY_1_On BIOKEY_1_Pre BIOKEY_10_On BIOKEY_10_Pre BIOKEY_11_On
## A1BG      1900.38751    2420.4932    2037.5990     2090.1745   1381.90946
## A1BG-AS1   314.69457     424.6407     470.1492      498.3933    219.37918
## A2M      23268.83314    2686.2540    2666.0859     4816.8543    832.72719
## A2M-AS1     64.37906     200.7731     173.9043      115.5187     48.49831
## A4GALT    1510.14001     200.9704     145.5802      286.4946     34.93897
## AAAS       841.60844    1103.2443    1093.1193     1348.4628    479.36815
##          BIOKEY_11_Pre BIOKEY_12_On BIOKEY_12_Pre BIOKEY_13_On BIOKEY_13_Pre
## A1BG         559.64175    1855.8546     3231.5822    326.62392     1715.9383
## A1BG-AS1      46.86798     228.2988      205.0674     30.22590      343.3174
## A2M          390.02989    1945.9009     5295.7009   2985.44388     7077.9538
## A2M-AS1       24.69787     145.4389      121.9377     17.88692      125.7846
## A4GALT        10.93204     161.8869      456.1433    175.20030      630.6713
## AAAS         312.24811     679.2446      874.5552    212.84814      821.0722
##          BIOKEY_14_On BIOKEY_14_Pre BIOKEY_15_On BIOKEY_15_Pre BIOKEY_16_On
## A1BG        356.43841    1254.12969    1974.9635    1154.70181    1842.4072
## A1BG-AS1     24.04932     147.91064     337.1768     244.01799     272.2644
## A2M        2924.69116    3529.77582    2488.7363    3454.74353    6626.4567
## A2M-AS1      23.41485      63.36445     115.2525      39.16571     157.6789
## A4GALT      136.38584     362.48009     237.5187     260.08511     460.8066
## AAAS        131.35655     651.31124     865.1258     477.67563     720.9153
##          BIOKEY_16_Pre BIOKEY_17_On BIOKEY_17_Pre BIOKEY_18_On BIOKEY_18_Pre
## A1BG         1801.9798    1689.3551     1247.2119    991.87062     460.38283
## A1BG-AS1      351.0143     182.0227      111.8949    146.67247      57.42074
## A2M          3195.9099    3507.5651     1904.2612   6879.32330    5202.23437
## A2M-AS1       157.2182      40.8649       54.8405     29.86681      25.79275
## A4GALT        123.1863     806.6421      251.6255    700.19699     196.93697
## AAAS          676.7908     336.4861      310.9628    605.52253     219.99570
##          BIOKEY_19_On BIOKEY_19_Pre BIOKEY_2_On BIOKEY_2_Pre BIOKEY_20_On
## A1BG        863.48156     1542.6933  1976.77315   1558.08227    792.17530
## A1BG-AS1     84.36332      309.2133   162.32359    169.94594    122.26586
## A2M        3607.52909    11753.8181  7373.98091   7569.68694   6064.83994
## A2M-AS1      43.26117      131.6930    43.49986     29.03899     15.74553
## A4GALT      212.73272      441.0435   269.62462    105.57977    334.47803
## AAAS        312.03455      723.3303   534.54011    413.87989    494.24070
##          BIOKEY_20_Pre BIOKEY_21_On BIOKEY_21_Pre BIOKEY_22_On BIOKEY_22_Pre
## A1BG         64.378561   1489.29558     878.24589    1398.0354     191.79383
## A1BG-AS1     12.292394     88.67642      98.07367     147.4568      17.03978
## A2M        1491.594254   2693.08371    2556.03826    2675.3610    1233.02681
## A2M-AS1       3.000855     18.21744      18.44090      53.9638      20.77072
## A4GALT      110.549904   1475.06842     332.45256     982.2869     111.21624
## AAAS         51.997246    643.38051     270.48765     676.9780     163.44320
##          BIOKEY_23_On BIOKEY_23_Pre BIOKEY_24_On BIOKEY_24_Pre BIOKEY_25_On
## A1BG        2208.5338      76.48401    712.50143     653.02749    213.27953
## A1BG-AS1     292.7142      31.29229     73.92808      45.52037     33.37356
## A2M         6464.4014     533.28570   3933.47287    2117.44697    352.39940
## A2M-AS1      118.7594      10.26405     67.45770      18.72431      0.00000
## A4GALT       497.2741      35.67510    461.98096     169.50604     44.75861
## AAAS         546.7235      20.59312    269.69741     213.80754     82.39351
##          BIOKEY_25_Pre BIOKEY_26_On BIOKEY_26_Pre BIOKEY_27_On BIOKEY_27_Pre
## A1BG        101.638815   5229.66247     857.58915    751.93290     1546.5327
## A1BG-AS1      6.409162    211.84285      52.28328    110.01495      226.2388
## A2M         648.846994  18311.18892    4039.69997   3863.13499     7730.4527
## A2M-AS1       0.000000     44.73858      33.09237     60.34836      125.1434
## A4GALT       48.911380    738.38370      87.17145    454.23169      638.9537
## AAAS         58.852298    505.28363     147.76195    258.14705      636.9252
##          BIOKEY_28_On BIOKEY_28_Pre BIOKEY_29_On BIOKEY_29_Pre BIOKEY_3_On
## A1BG       1085.89258     372.01264    60.421630     652.40703  1702.58326
## A1BG-AS1     77.89455      92.53131    11.663869      44.64777   139.84319
## A2M        2271.96717    3227.92061   522.678685    1831.08341  2802.43695
## A2M-AS1      21.21864      16.34732     5.359943      14.15381    45.69168
## A4GALT       13.69600     153.57510    48.620618     115.80593   401.35488
## AAAS        232.72662     189.72427    35.980999     176.29975   358.86291
##          BIOKEY_3_Pre BIOKEY_30_On BIOKEY_30_Pre BIOKEY_31_On BIOKEY_31_Pre
## A1BG        1870.5906   1167.76271    2039.68894   1983.15751    1186.54770
## A1BG-AS1     225.2130    105.42174     170.07499    199.62419      68.97513
## A2M         3040.0649   2919.04175    3159.96041   4374.99996    1165.70749
## A2M-AS1       37.2509     27.48489      22.51573     67.24141      12.15292
## A4GALT       290.6268    102.85704      96.79814    464.53299     426.35554
## AAAS         441.3157    406.16096     504.91820    485.53125     202.89897
##          BIOKEY_4_On BIOKEY_4_Pre BIOKEY_5_On BIOKEY_5_Pre BIOKEY_6_On
## A1BG      994.603168   2179.06098   137.75410   1031.56107   839.92827
## A1BG-AS1  102.681087    293.93446    18.66668    164.84794   105.37274
## A2M       566.885365   1131.71116   343.50344    563.94215  5049.67645
## A2M-AS1    52.216702     72.76798    14.36486    135.32170    28.95516
## A4GALT      7.668021     87.94784    33.58809     34.20967   510.43514
## AAAS      504.467631    884.23504    29.61075    374.69285   482.93330
##          BIOKEY_6_Pre BIOKEY_7_On BIOKEY_7_Pre BIOKEY_8_On BIOKEY_8_Pre
## A1BG        942.92485  1477.88713    209.93130   559.91279    138.94097
## A1BG-AS1    108.88995   113.41705     17.15663    74.87933     33.23953
## A2M        3731.40542  1403.38859    179.28400   229.90130    287.42814
## A2M-AS1      23.07205    44.48068      0.00000    45.09714      0.00000
## A4GALT      267.64798   388.33220     29.14525    11.44350     16.51448
## AAAS        553.78968   288.95222     55.61864   208.89668     63.90025
##          BIOKEY_9_On BIOKEY_9_Pre
## A1BG      360.206896    968.90932
## A1BG-AS1   36.730616    100.41154
## A2M      1436.647896   7330.89666
## A2M-AS1     7.297468     29.82598
## A4GALT     78.491086    456.12146
## AAAS      106.699061    412.62730
counts = counts[,-c(25:26,31:32,35:36,43:44,57:58)]
dim(counts)
## [1] 25288    52
# I thus removed these 2 samples and their pre/on treatment counterparts.

#table(brcaDat1@meta.data$cellType)
#table(brcaDat1@meta.data$patient_id, brcaDat1@meta.data$cellType)
#prop.table(table(brcaDat1@meta.data$patient_id, brcaDat1@meta.data$cellType),1) %>% round(.,3)

#boxplot(prop.table(table(brcaDat1@meta.data$CHETAH, brcaDat1@meta.data$patient_id),1) %>% round(.,3))

# boxplot (brcaDat1@meta.data$CHETAH, brcaDat1@meta.data$patient_id)

# hist(log10(brcaDat1@meta.data$nCount_RNA), 100)

dim(brcaDat1)
## [1]  25288 122993
head(brcaDat1)
##                                  orig.ident nCount_RNA nFeature_RNA
## BIOKEY_13_Pre_AAACCTGCAACAACCT-1       brca        684          430
## BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1       brca       1252          700
## BIOKEY_13_Pre_AAACCTGGTCTCCACT-1       brca        522          330
## BIOKEY_13_Pre_AAACCTGTCAACGAAA-1       brca       8454         2637
## BIOKEY_13_Pre_AAACGGGCACAGAGGT-1       brca       1908          916
## BIOKEY_13_Pre_AAACGGGCATGGGACA-1       brca       5652         2290
## BIOKEY_13_Pre_AAACGGGGTTCATGGT-1       brca       1779          860
## BIOKEY_13_Pre_AAACGGGTCAACGAAA-1       brca        626          211
## BIOKEY_13_Pre_AAACGGGTCACCAGGC-1       brca       5873         2248
## BIOKEY_13_Pre_AAACGGGTCCGAACGC-1       brca        742          424
##                                                              Cell patient_id
## BIOKEY_13_Pre_AAACCTGCAACAACCT-1 BIOKEY_13_Pre_AAACCTGCAACAACCT-1  BIOKEY_13
## BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1 BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1  BIOKEY_13
## BIOKEY_13_Pre_AAACCTGGTCTCCACT-1 BIOKEY_13_Pre_AAACCTGGTCTCCACT-1  BIOKEY_13
## BIOKEY_13_Pre_AAACCTGTCAACGAAA-1 BIOKEY_13_Pre_AAACCTGTCAACGAAA-1  BIOKEY_13
## BIOKEY_13_Pre_AAACGGGCACAGAGGT-1 BIOKEY_13_Pre_AAACGGGCACAGAGGT-1  BIOKEY_13
## BIOKEY_13_Pre_AAACGGGCATGGGACA-1 BIOKEY_13_Pre_AAACGGGCATGGGACA-1  BIOKEY_13
## BIOKEY_13_Pre_AAACGGGGTTCATGGT-1 BIOKEY_13_Pre_AAACGGGGTTCATGGT-1  BIOKEY_13
## BIOKEY_13_Pre_AAACGGGTCAACGAAA-1 BIOKEY_13_Pre_AAACGGGTCAACGAAA-1  BIOKEY_13
## BIOKEY_13_Pre_AAACGGGTCACCAGGC-1 BIOKEY_13_Pre_AAACGGGTCACCAGGC-1  BIOKEY_13
## BIOKEY_13_Pre_AAACGGGTCCGAACGC-1 BIOKEY_13_Pre_AAACGGGTCCGAACGC-1  BIOKEY_13
##                                  timepoint expansion BC_type         cellType
## BIOKEY_13_Pre_AAACCTGCAACAACCT-1       Pre       n/a   HER2+     Myeloid_cell
## BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1       Pre       n/a   HER2+           T_cell
## BIOKEY_13_Pre_AAACCTGGTCTCCACT-1       Pre       n/a   HER2+              pDC
## BIOKEY_13_Pre_AAACCTGTCAACGAAA-1       Pre       n/a   HER2+     Myeloid_cell
## BIOKEY_13_Pre_AAACGGGCACAGAGGT-1       Pre       n/a   HER2+     Myeloid_cell
## BIOKEY_13_Pre_AAACGGGCATGGGACA-1       Pre       n/a   HER2+       Fibroblast
## BIOKEY_13_Pre_AAACGGGGTTCATGGT-1       Pre       n/a   HER2+ Endothelial_cell
## BIOKEY_13_Pre_AAACGGGTCAACGAAA-1       Pre       n/a   HER2+           B_cell
## BIOKEY_13_Pre_AAACGGGTCACCAGGC-1       Pre       n/a   HER2+       Fibroblast
## BIOKEY_13_Pre_AAACGGGTCCGAACGC-1       Pre       n/a   HER2+      Cancer_cell
##                                           cohort percent.mt RNA_snn_res.0.5
## BIOKEY_13_Pre_AAACCTGCAACAACCT-1 treatment_naive  2.4853801               9
## BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1 treatment_naive  4.0734824               0
## BIOKEY_13_Pre_AAACCTGGTCTCCACT-1 treatment_naive  0.7662835               7
## BIOKEY_13_Pre_AAACCTGTCAACGAAA-1 treatment_naive  4.2228531               9
## BIOKEY_13_Pre_AAACGGGCACAGAGGT-1 treatment_naive  5.9224319               4
## BIOKEY_13_Pre_AAACGGGCATGGGACA-1 treatment_naive  3.8747346               3
## BIOKEY_13_Pre_AAACGGGGTTCATGGT-1 treatment_naive  7.0264193              18
## BIOKEY_13_Pre_AAACGGGTCAACGAAA-1 treatment_naive  0.1597444              12
## BIOKEY_13_Pre_AAACGGGTCACCAGGC-1 treatment_naive  5.3635280               3
## BIOKEY_13_Pre_AAACGGGTCCGAACGC-1 treatment_naive  2.4258760               0
##                                  seurat_clusters     CHETAH   patient_trt
## BIOKEY_13_Pre_AAACCTGCAACAACCT-1               9 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACCTGCAAGAAGAG-1               0 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACCTGGTCTCCACT-1               7 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACCTGTCAACGAAA-1               9 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGCACAGAGGT-1               4 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGCATGGGACA-1               3        CAF BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGGTTCATGGT-1              18 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGTCAACGAAA-1              12 Unassigned BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGTCACCAGGC-1               3        CAF BIOKEY_13_Pre
## BIOKEY_13_Pre_AAACGGGTCCGAACGC-1               0 Unassigned BIOKEY_13_Pre
# table(brcaDat1@meta.data$CHETAH, brcaDat1@meta.data$cellType)

# brcaDat1 = brcaDat1[-c(brcaDat1$patient_id=="BIOKEY_23_PRE", brcaDat1$patient_id=="BIOKEY_23_ON", brcaDat1$patient_id=="BIOKEY_29_PRE", brcaDat1$patient_id=="BIOKEY_29_ON"),]
# brcaDat1 = brcaDat1[,-c(brcaDat1$patient_id=="BIOKEY_23", brcaDat1$patient_id=="BIOKEY_29")]
# dim(brcaDat1)

# table(brcaDat1$patient_id)



# nCount_RNA = total number of molecules detected within a cell. This data shows a two-peak distribution of data.

# hist(brcaDat1@meta.data$nCount_RNA/brcaDat1@meta.data$nFeature_RNA, 100)

# boxplot((brcaDat1@meta.data$nCount_RNA/brcaDat1@meta.data$nFeature_RNA)~brcaDat1@meta.data$patient_id)

I cut off the bottom 6 tumours, because they have a low read information. These tumours are: BIOKEY_20_Pre, BIOKEY_23_Pre, BIOKEY_25_Pre, BIOKEY_29_On, BIOKEY_5_On, and BIOKEY_7_Pre. I assume that I should also remove their respective counterparts (pre- and on- data relating to that same tumour).

Voom normalisation

dge = DGEList( counts = counts)
dge = calcNormFactors(dge)

logcpm = edgeR::cpm(dge, log = TRUE, prior.counts = 3)
design = model.matrix(~rep(1, ncol(counts)) - 1)
v = voom(dge, design = design, plot = TRUE)

expDat = v$E
expDat[1:5,1:5]
##          BIOKEY_1_On BIOKEY_1_Pre BIOKEY_10_On BIOKEY_10_Pre BIOKEY_11_On
## A1BG       5.6562549     5.572602     5.389627      5.386081    5.9465599
## A1BG-AS1   3.0638964     3.063015     3.275127      3.318916    3.2941579
## A2M        9.2699397     5.722867     5.777397      6.590354    5.2161568
## A2M-AS1    0.7834785     1.984229     1.842910      1.214542    1.1282508
## A4GALT     5.3247417     1.985643     1.587235      2.521208    0.6608555
head(expDat)
##          BIOKEY_1_On BIOKEY_1_Pre BIOKEY_10_On BIOKEY_10_Pre BIOKEY_11_On
## A1BG       5.6562549     5.572602     5.389627      5.386081    5.9465599
## A1BG-AS1   3.0638964     3.063015     3.275127      3.318916    3.2941579
## A2M        9.2699397     5.722867     5.777397      6.590354    5.2161568
## A2M-AS1    0.7834785     1.984229     1.842910      1.214542    1.1282508
## A4GALT     5.3247417     1.985643     1.587235      2.521208    0.6608555
## AAAS       4.4816597     4.439409     4.491514      4.753963    4.4200849
##          BIOKEY_11_Pre BIOKEY_12_On BIOKEY_12_Pre BIOKEY_13_On BIOKEY_13_Pre
## A1BG         5.4943341     5.876692      6.558254     5.372207      5.767321
## A1BG-AS1     1.9305263     2.856371      2.583474     1.959897      3.447617
## A2M          4.9739751     5.945029      7.270748     8.562485      7.811337
## A2M-AS1      1.0199158     2.207656      1.835910     1.219121      2.002654
## A4GALT      -0.1203027     2.361727      3.734930     4.475486      4.324006
## AAAS         4.6535433     4.427284      4.673236     4.755579      4.704362
##          BIOKEY_14_On BIOKEY_14_Pre BIOKEY_15_On BIOKEY_15_Pre BIOKEY_16_On
## A1BG         5.491218      5.644349     5.659285     5.3438729     5.169531
## A1BG-AS1     1.629298      2.564754     3.110809     3.1037405     2.413275
## A2M          8.526002      7.136868     5.992797     6.9245154     7.015895
## A2M-AS1      1.591521      1.348244     1.566209     0.4797642     1.627177
## A4GALT       4.108516      3.855050     2.606241     3.1955546     3.171346
## AAAS         4.054512      4.699614     4.468909     4.0713405     3.816450
##          BIOKEY_16_Pre BIOKEY_17_On BIOKEY_17_Pre BIOKEY_18_On BIOKEY_18_Pre
## A1BG          5.429984     6.398167      6.540137    5.2725695      5.588972
## A1BG-AS1      3.071657     3.187415      3.067500    2.5191983      2.596724
## A2M           6.256454     7.451943      7.150463    8.0659901      9.085770
## A2M-AS1       1.915423     1.045818      2.045332    0.2422577      1.457309
## A4GALT        1.564759     5.332162      4.233066    4.7704812      4.365964
## AAAS          4.017848     4.072028      4.537984    4.5610619      4.525323
##          BIOKEY_19_On BIOKEY_19_Pre BIOKEY_2_On BIOKEY_2_Pre BIOKEY_21_On
## A1BG         5.872312      5.373995   6.3601808    6.3617116    6.1896793
## A1BG-AS1     2.524524      3.057081   2.7580502    3.1688635    2.1273710
## A2M          7.934450      8.303195   8.2592101    8.6418063    7.0440918
## A2M-AS1      1.569034      1.828793   0.8703114    0.6402419   -0.1249084
## A4GALT       3.853740      3.568703   3.4883656    2.4846987    6.1758357
## AAAS         4.405327      4.281799   4.4743875    4.4505032    4.9794298
##          BIOKEY_21_Pre BIOKEY_22_On BIOKEY_22_Pre BIOKEY_24_On BIOKEY_24_Pre
## A1BG         6.2284215     6.119874      5.122621     5.749911     5.9290209
## A1BG-AS1     3.0722496     2.879205      1.668012     2.489925     2.1011174
## A2M          7.7690951     7.055960      7.804026     8.213921     7.6253675
## A2M-AS1      0.6925521     1.437398      1.946249     2.358715     0.8417764
## A4GALT       4.8282920     5.610908      4.339148     5.125400     3.9863590
## AAAS         4.5312025     5.074203      4.892505     4.350020     4.3204556
##          BIOKEY_26_On BIOKEY_26_Pre BIOKEY_27_On BIOKEY_27_Pre BIOKEY_28_On
## A1BG         7.384139      6.281395     5.988802      6.008534    6.4364013
## A1BG-AS1     2.761751      2.258421     3.221481      3.238134    2.6437530
## A2M          9.191977      8.516622     8.349126      8.329677    7.5011152
## A2M-AS1      0.530981      1.606473     2.360529      2.386437    0.7919333
## A4GALT       4.560703      2.990447     5.262255      4.733943    0.1784844
## AAAS         4.013876      3.748416     4.448224      4.729359    4.2166601
##          BIOKEY_28_Pre BIOKEY_3_On BIOKEY_3_Pre BIOKEY_30_On BIOKEY_30_Pre
## A1BG         5.1438604    6.527264    6.3610656    6.2089421     6.6789158
## A1BG-AS1     3.1423593    2.926146    3.3097476    2.7456480     3.0986911
## A2M          8.2593277    7.246054    7.0615256    7.5303193     7.3103478
## A2M-AS1      0.6771622    1.322892    0.7298409    0.8253694     0.2089768
## A4GALT       3.8702047    4.443861    3.6769058    2.7102859     2.2887690
## AAAS         4.1742724    4.282628    4.2787026    4.6864758     4.6657626
##          BIOKEY_31_On BIOKEY_31_Pre BIOKEY_4_On BIOKEY_4_Pre BIOKEY_5_On
## A1BG         6.018702     6.2703980    5.564463     5.927756    5.719734
## A1BG-AS1     2.709506     2.1756605    2.294796     3.039737    2.869084
## A2M          7.159987     6.2448446    4.753946     4.982861    7.034835
## A2M-AS1      1.146721    -0.2813656    1.325949     1.033045    2.502390
## A4GALT       3.925944     4.7948399   -1.364252     1.304689    3.699751
## AAAS         3.989660     3.7254043    4.585808     4.627035    3.520763
##          BIOKEY_5_Pre BIOKEY_6_On BIOKEY_6_Pre BIOKEY_8_On BIOKEY_8_Pre
## A1BG        5.6010827   5.8228518   5.35769515   5.7145084     5.171955
## A1BG-AS1    2.9591313   2.8340583   2.24926771   2.8202593     3.124813
## A2M         4.7304519   8.4099890   7.34162807   4.4321673     6.218010
## A2M-AS1     2.6753402   0.9883196   0.03493685   2.0950336    -2.951555
## A4GALT      0.7070359   5.1048672   3.54281694   0.1623155     2.137136
## AAAS        4.1412585   5.0250439   4.59042770   4.2942570     4.057439
##          BIOKEY_9_On BIOKEY_9_Pre
## A1BG        6.075657    5.3941316
## A1BG-AS1    2.799392    2.1301168
## A2M         8.069967    8.3130418
## A2M-AS1     0.543980    0.3956518
## A4GALT      3.884592    4.3080243
## AAAS        4.325123    4.1636121

Here I put the scRNAseq data through voom normalisation. The “voom” function estimates the relationship between the mean and the variance of the logCPM data, normalises the data, and creates “precision weights” for each observation that are incorporated into the limma analysis. From this voom normalisation, I created an expDat expression object.

Pam50

setwd("/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq")
load("expdatpreandon.RData", verbose = T)
## Loading objects:
##   expDaton
##   expDatpre
head(expDatpre)
##          T_BIOKEY_1_Pre T_BIOKEY_10_Pre T_BIOKEY_11_Pre E_BIOKEY_12_Pre
## A1BG           5.516145        5.431743        5.413533       6.9626246
## A1BG-AS1       2.997086        3.239654        1.691761       1.3996873
## A2M            4.539965        6.134018        4.152077       6.5496177
## A2M-AS1        1.479498        1.143693        1.200933       0.7116221
## A4GALT         1.460347        2.455938       -3.898274       3.1029785
## AAAS           4.371725        4.656356        4.620697       4.8215507
##          H_BIOKEY_13_Pre T_BIOKEY_14_Pre T_BIOKEY_15_Pre T_BIOKEY_16_Pre
## A1BG            5.694850        5.312122       5.1150936        5.472128
## A1BG-AS1        3.399908        1.696770       3.0854935        3.065144
## A2M             7.333523        6.722100       6.3072421        4.678757
## A2M-AS1         1.586530        1.469455      -0.8459435        2.058809
## A4GALT          3.863119        3.870668       2.6443854        1.143593
## AAAS            4.548070        4.634678       4.1542320        4.100351
##          E_BIOKEY_17_Pre E_BIOKEY_18_Pre T_BIOKEY_19_Pre T_BIOKEY_2_Pre
## A1BG           6.9725245       5.8004299        5.199503      6.6866931
## A1BG-AS1       1.7868431       2.6129625        3.130837      3.4713497
## A2M            6.4869919       8.2738843        7.953555      8.8882832
## A2M-AS1        0.8626157       0.4420466        1.472353     -0.1811935
## A4GALT         3.7296845       4.2644559        3.166750      1.6956410
## AAAS           4.3232065       5.3965006        4.022851      4.0371876
##          E_BIOKEY_20_Pre E_BIOKEY_21_Pre E_BIOKEY_22_Pre H_BIOKEY_23_Pre
## A1BG            1.176529       6.5114793        5.555067       2.8795228
## A1BG-AS1        1.176529       3.3055763        2.045092       5.6808534
## A2M             8.690956       6.8709672        7.109675       7.3724163
## A2M-AS1         1.176529      -0.3552621        1.918203      -0.1942732
## A4GALT          1.176529       5.0117301        3.622746       4.2105114
## AAAS            3.730991       4.0803646        4.632077       3.2842698
##          E_BIOKEY_24_Pre T_BIOKEY_25_Pre T_BIOKEY_26_Pre E_BIOKEY_27_Pre
## A1BG           6.3196452       5.3433102        6.243258       5.9996325
## A1BG-AS1       0.7626526      -0.2560209        2.482995       3.5492414
## A2M            6.7106390       5.2666285        8.619317       7.8014330
## A2M-AS1       -2.0029806      -0.2560209        1.182056       0.9737809
## A4GALT         3.4753680      -0.2560209        2.969126       4.4407770
## AAAS           4.2134547      -0.2560209        3.654162       4.7984305
##          H_BIOKEY_28_Pre E_BIOKEY_29_Pre E_BIOKEY_3_Pre E_BIOKEY_30_Pre
## A1BG            5.168043        6.787897      6.2232016        6.122288
## A1BG-AS1        3.374182        2.672277      3.8918174        2.807408
## A2M             7.392796        6.954365      6.3955437        7.712788
## A2M-AS1        -2.270948        1.015733      0.7360115        2.509067
## A4GALT          2.815316        3.937832      3.5063148        0.719702
## AAAS            4.445411        5.104811      3.8882224        5.323493
##          T_BIOKEY_31_Pre E_BIOKEY_4_Pre E_BIOKEY_5_Pre E_BIOKEY_6_Pre
## A1BG            6.657552      5.7312825      5.7578539      5.6438626
## A1BG-AS1        2.181624      2.8505131      2.7643786     -2.7281067
## A2M             6.274596      3.5410057      0.9604585      6.5644885
## A2M-AS1        -1.681436     -0.4571456      1.4749382     -0.5827321
## A4GALT          5.316876      0.8842042     -1.6077021      3.1398225
## AAAS            3.806920      4.4301778      3.8430954      4.5231817
##          E_BIOKEY_7_Pre T_BIOKEY_8_Pre T_BIOKEY_9_Pre
## A1BG          6.2737109      5.1780177       5.184807
## A1BG-AS1      3.4544844      2.9421822       2.317415
## A2M           5.0657361      3.3681097       8.081296
## A2M-AS1      -0.1963423     -2.2277830      -0.209915
## A4GALT        2.7600000     -0.5087031       4.325937
## AAAS          4.3550455      3.8162667       4.127680
head(expDaton)
##          T_BIOKEY_1_On T_BIOKEY_10_On T_BIOKEY_11_On E_BIOKEY_12_On
## A1BG          5.593694       5.480948      5.8927430       5.967722
## A1BG-AS1      2.519558       2.917420      3.5706021       2.373584
## A2M           8.371064       5.447220      3.8187982       4.274771
## A2M-AS1       1.650927       1.130577      0.2677720       1.728762
## A4GALT        4.643174       1.566061     -0.4592697       1.957988
## AAAS          4.333861       4.284210      4.2789935       4.397291
##          H_BIOKEY_13_On T_BIOKEY_14_On T_BIOKEY_15_On T_BIOKEY_16_On
## A1BG          5.6831548       5.661959       5.682286       5.184911
## A1BG-AS1      1.5285815       1.118120       3.205944       2.382728
## A2M           7.4582249       8.077234       5.691263       6.739329
## A2M-AS1       0.5827826      -2.846790       1.558496       1.426495
## A4GALT        3.8765917       4.213639       2.203444       3.246004
## AAAS          4.7713310       4.329493       4.509878       3.815115
##          E_BIOKEY_17_On E_BIOKEY_18_On T_BIOKEY_19_On T_BIOKEY_2_On
## A1BG           6.300132       5.043385       5.602146     6.4209985
## A1BG-AS1       3.645268       1.661507       3.274227     2.7406479
## A2M            6.450069       7.836474       6.361894     8.7286273
## A2M-AS1        2.261176       0.367614      -1.371798    -0.8633119
## A4GALT         3.680405       4.865045       3.824435     2.8001224
## AAAS           2.897744       4.736787       1.971860     4.4909923
##          E_BIOKEY_20_On E_BIOKEY_21_On E_BIOKEY_22_On H_BIOKEY_23_On
## A1BG          5.2249578      6.3920409       6.095602      6.7085634
## A1BG-AS1      2.6145721      1.8623147       3.344646      4.1013221
## A2M           7.6013998      6.0589374       6.107070      7.6700494
## A2M-AS1      -0.1537773      0.4856339       1.648652      0.7017416
## A4GALT        3.4599911      6.1876960       5.677838      4.0548416
## AAAS          4.4096361      4.7629929       5.098202      4.4447414
##          E_BIOKEY_24_On T_BIOKEY_25_On T_BIOKEY_26_On E_BIOKEY_27_On
## A1BG           5.845190       5.859633      7.4124501       6.431447
## A1BG-AS1       3.426896       4.390649      2.8426537       3.598911
## A2M            8.035704       6.192754      9.1257901       7.543428
## A2M-AS1        2.558571      -1.156028      0.5764085      -1.482937
## A4GALT         4.258148       4.277467      4.6071349       5.377628
## AAAS           5.315897       4.167467      3.9125363       4.866365
##          H_BIOKEY_28_On E_BIOKEY_29_On E_BIOKEY_3_On E_BIOKEY_30_On
## A1BG          6.7142363       6.347347      6.217134      6.5135803
## A1BG-AS1      1.6902329       3.007808      2.647809      3.0530818
## A2M           8.2976905       5.230938      7.024702      7.3640544
## A2M-AS1       0.3200853       3.007808      1.031557      0.5543158
## A4GALT        0.1924999       6.067528      4.242843      1.9288582
## AAAS          3.7705896       3.007808      3.753301      4.6202219
##          T_BIOKEY_31_On E_BIOKEY_4_On E_BIOKEY_5_On E_BIOKEY_6_On E_BIOKEY_7_On
## A1BG          6.4391380      5.317964      6.144400     5.6636118      7.080815
## A1BG-AS1      2.4980394      2.751499      1.069224     3.1347585      2.931139
## A2M           7.3432528      4.655921      6.077872     7.9495697      6.311168
## A2M-AS1       0.3779867      1.631032      1.069224    -0.5050988      2.213239
## A4GALT        4.5948779     -3.979205      1.069224     4.4577645      4.797048
## AAAS          3.9582696      4.231757      5.273033     5.7455365      4.564079
##          T_BIOKEY_8_On T_BIOKEY_9_On
## A1BG         5.6358750      6.613882
## A1BG-AS1     2.7856012      3.072074
## A2M          3.8636246      7.829947
## A2M-AS1      2.4442290     -1.842228
## A4GALT       0.3442736      1.727214
## AAAS         4.3373941      4.530149
expDatpre = expDatpre[,-c(13,16,18,22,29)]
expDaton = expDaton[,-c(13,16,18,22,29)]

# From the expDat object I created expDatpre and expDaton objects, which just contain expression data for the samples' pre-treatment or on-treatment tumours individually.
data(pam50.robust)

expAnnot = matrix(rownames(expDat), ncol=1)
colnames(expAnnot) = "Gene.Symbol"
expAnn = AnnotationDbi::select(org.Hs.eg.db, keys = expAnnot, keytype = "SYMBOL", columns = "ENTREZID")
mt = match(rownames(expDat), expAnn$SYMBOL)
expAnnx = expAnn[mt,]
colnames(expAnnx) = c("Gene.Symbol", "EntrezGene.ID")

expDatpreScale = t(scale(t(expDatpre)))
expDatpreScale[expDatpreScale < -3] = -3
expDatpreScale[expDatpreScale >  3] =  3


PAM50pre = molecular.subtyping(sbt.model = "pam50", data= t(expDatpre),
                                annot=expAnnx, do.mapping=FALSE)

par(mar=c(7,5,2,2))
table(PAM50pre$subtype)
## 
##  Basal   Her2   LumB   LumA Normal 
##      9      3      5      6      3
boxplot(expDatpre["HIST1H2AI",] ~ PAM50pre$subtype, las=3, ylab = "Relative expression ofHIST1H2AI gene", xlab = NULL, cex=0, cex.lab = 1.2, cex.axis=1.2)

mtext('BC subtype', 1, 5, cex = 1.2)

PAM50on = molecular.subtyping(sbt.model = "pam50", data= t(expDaton),
                                annot=expAnnx, do.mapping=FALSE)

PAM50all = molecular.subtyping(sbt.model = "pam50", data= t(expDat),
                                annot=expAnnx, do.mapping=FALSE)

table(PAM50on$subtype)
## 
##  Basal   Her2   LumB   LumA Normal 
##     10      6      4      4      2
boxplot(expDaton["ESR1",] ~ PAM50on$subtype, las=3)

dim(expDatpre)
## [1] 25288    26
table(PAM50on$subtype, PAM50pre$subtype)
##         
##          Basal Her2 LumB LumA Normal
##   Basal      7    0    0    1      2
##   Her2       1    3    1    0      1
##   LumB       0    0    3    1      0
##   LumA       0    0    1    3      0
##   Normal     1    0    0    1      0

Above I performed PAM50 molecular subtyping on the expression data, both pre- and on- treatment. I then compared them, to see how many tumours seemingly ‘changed’ molecular subtype upon anti-PD1 treatment.

pid = sort(unique(brcaDat1@meta.data$patient_id))
pid = pid[-c(13,16,18,22,29)]
pid
##  [1] "BIOKEY_1"  "BIOKEY_10" "BIOKEY_11" "BIOKEY_12" "BIOKEY_13" "BIOKEY_14"
##  [7] "BIOKEY_15" "BIOKEY_16" "BIOKEY_17" "BIOKEY_18" "BIOKEY_19" "BIOKEY_2" 
## [13] "BIOKEY_21" "BIOKEY_22" "BIOKEY_24" "BIOKEY_26" "BIOKEY_27" "BIOKEY_28"
## [19] "BIOKEY_3"  "BIOKEY_30" "BIOKEY_31" "BIOKEY_4"  "BIOKEY_5"  "BIOKEY_6" 
## [25] "BIOKEY_8"  "BIOKEY_9"
cbind(pid, colnames(expDatpre))
##       pid                          
##  [1,] "BIOKEY_1"  "T_BIOKEY_1_Pre" 
##  [2,] "BIOKEY_10" "T_BIOKEY_10_Pre"
##  [3,] "BIOKEY_11" "T_BIOKEY_11_Pre"
##  [4,] "BIOKEY_12" "E_BIOKEY_12_Pre"
##  [5,] "BIOKEY_13" "H_BIOKEY_13_Pre"
##  [6,] "BIOKEY_14" "T_BIOKEY_14_Pre"
##  [7,] "BIOKEY_15" "T_BIOKEY_15_Pre"
##  [8,] "BIOKEY_16" "T_BIOKEY_16_Pre"
##  [9,] "BIOKEY_17" "E_BIOKEY_17_Pre"
## [10,] "BIOKEY_18" "E_BIOKEY_18_Pre"
## [11,] "BIOKEY_19" "T_BIOKEY_19_Pre"
## [12,] "BIOKEY_2"  "T_BIOKEY_2_Pre" 
## [13,] "BIOKEY_21" "E_BIOKEY_21_Pre"
## [14,] "BIOKEY_22" "E_BIOKEY_22_Pre"
## [15,] "BIOKEY_24" "E_BIOKEY_24_Pre"
## [16,] "BIOKEY_26" "T_BIOKEY_26_Pre"
## [17,] "BIOKEY_27" "E_BIOKEY_27_Pre"
## [18,] "BIOKEY_28" "H_BIOKEY_28_Pre"
## [19,] "BIOKEY_3"  "E_BIOKEY_3_Pre" 
## [20,] "BIOKEY_30" "E_BIOKEY_30_Pre"
## [21,] "BIOKEY_31" "T_BIOKEY_31_Pre"
## [22,] "BIOKEY_4"  "E_BIOKEY_4_Pre" 
## [23,] "BIOKEY_5"  "E_BIOKEY_5_Pre" 
## [24,] "BIOKEY_6"  "E_BIOKEY_6_Pre" 
## [25,] "BIOKEY_8"  "T_BIOKEY_8_Pre" 
## [26,] "BIOKEY_9"  "T_BIOKEY_9_Pre"
mt = match(pid, brcaDat1@meta.data$patient_id)
bc_type= brcaDat1@meta.data$BC_type[mt]
table(PAM50pre$subtype, bc_type)
##         bc_type
##          ER+ HER2+ TNBC
##   Basal    0     2    7
##   Her2     1     0    2
##   LumB     5     0    0
##   LumA     6     0    0
##   Normal   0     0    3

I then compared the intrinsic molecular subtypes to the overall BC type,

I tried this code table(PAM50all$subtype.crisp, brcaDat1$BC_type) but I couldn’t get it to work because the arguments weren’t all of the same length. This is because the brcaDat1 object has a row for each sequence, whereas the PAM50 object only has a row for each patient. I should ask Mik about this!

Delta Genes

dim(expDat)
## [1] 25288    52
delta1 = as.matrix(expDat[,1] - expDat[,2])
delta2 = as.matrix(expDat[,23] - expDat[,24])
delta3 = as.matrix(expDat[,37] - expDat[,38])
delta4 = as.matrix(expDat[,43] - expDat[,44])
delta5 = as.matrix(expDat[,45] - expDat[,46])
delta6 = as.matrix(expDat[,47] - expDat[,48])
delta8 = as.matrix(expDat[,49] - expDat[,50])
delta9 = as.matrix(expDat[,51] - expDat[,52])
delta10 = as.matrix(expDat[,3] - expDat[,4])
delta11 = as.matrix(expDat[,5] - expDat[,6])
delta12 = as.matrix(expDat[,7] - expDat[,8])
delta13 = as.matrix(expDat[,9] - expDat[,10])
delta14 = as.matrix(expDat[,11] - expDat[,12])
delta15 = as.matrix(expDat[,13] - expDat[,14])
delta16 = as.matrix(expDat[,15] - expDat[,16])
delta17 = as.matrix(expDat[,17] - expDat[,18])
delta18 = as.matrix(expDat[,19] - expDat[,20])
delta19 = as.matrix(expDat[,21] - expDat[,22])
delta21 = as.matrix(expDat[,25] - expDat[,26])
delta22 = as.matrix(expDat[,27] - expDat[,28])
delta24 = as.matrix(expDat[,29] - expDat[,30])
delta26 = as.matrix(expDat[,31] - expDat[,32])
delta27 = as.matrix(expDat[,33] - expDat[,34])
delta28 = as.matrix(expDat[,35] - expDat[,36])
delta30 = as.matrix(expDat[,39] - expDat[,40])
delta31 = as.matrix(expDat[,41] - expDat[,42])

dim(expDat)
## [1] 25288    52
deltag = cbind(delta1, delta10, delta11, delta12, delta13, delta14, delta15, delta16, delta17, delta18, delta19, delta2, delta21, delta22, delta24, delta26, delta27, delta28, delta3, delta30, delta31, delta4, delta5, delta6, delta8, delta9)
  
colnames(deltag) = c("T_BIOKEY1delta", "T_BIOKEY10delta", "T_BIOKEY11delta", "E_BIOKEY12delta", "H_BIOKEY13delta", "T_BIOKEY14delta", "T_BIOKEY15delta", "T_BIOKEY16delta", "E_BIOKEY17delta", "E_BIOKEY18delta", "T_BIOKEY19delta", "T_BIOKEY2delta", "E_BIOKEY21delta", "E_BIOKEY22delta", "E_BIOKEY24delta", "T_BIOKEY26delta", "E_BIOKEY27delta", "H_BIOKEY28delta", "E_BIOKEY3delta", "E_BIOKEY30delta", "T_BIOKEY31delta", "E_BIOKEY4delta", "E_BIOKEY5delta", "E_BIOKEY6delta", "T_BIOKEY8delta", "T_BIOKEY9delta")

deltaSD = apply(deltag, 1, sd)

deltaMean = apply(deltag, 1, mean)

plot(deltaMean, deltaSD, pch='.')

keep2000 = order(deltaSD, decreasing=TRUE)[1:2000]
head(keep2000)
## [1]  7044  6475  7008 18441  7042  7083
delta2000 = deltag[keep2000,]

deltag[1:5,1:5]
##          T_BIOKEY1delta T_BIOKEY10delta T_BIOKEY11delta E_BIOKEY12delta
## A1BG       0.0836532252     0.003545888       0.4522258      -0.6815619
## A1BG-AS1   0.0008813205    -0.043788828       1.3636316       0.2728971
## A2M        3.5470728193    -0.812956995       0.2421816      -1.3257193
## A2M-AS1   -1.2007507119     0.628368657       0.1083350       0.3717461
## A4GALT     3.3390989679    -0.933972749       0.7811581      -1.3732026
##          H_BIOKEY13delta
## A1BG          -0.3951139
## A1BG-AS1      -1.4877207
## A2M            0.7511477
## A2M-AS1       -0.7835323
## A4GALT         0.1514798
dim(deltag)
## [1] 25288    26

In this section of code I made my deltag object, which consists of the pre-treamtent expression values being subtracted from the on-treatment expression values, for every gene of every tumour. A positive value for a gene’s expression in this object thus means that that gene increased in expression upon treatment, while a negative value means that that gene decreased in expression upon treatment, for that tumour.

I also made a delta2000 object, where I sorted these genes by their standard deviation, and the delta2000 object only includes the top 2000 genes with the highest SDs.

Clustering Delta Genes - original clustering

dim(delta2000)
## [1] 2000   26
hc = hclust(dist(t(delta2000)))
plot(hc, hang = -1)

delta2000_scale = t(apply(delta2000, 1, scale, scale=TRUE))
delta2000_scale[delta2000_scale < -3] = -3
delta2000_scale[delta2000_scale > 3] = 3


table(brcaDat1$patient_id, brcaDat1$BC_type)
##            
##              ER+ HER2+ TNBC
##   BIOKEY_1     0     0 8103
##   BIOKEY_10    0     0 9192
##   BIOKEY_11    0     0 3966
##   BIOKEY_12 6534     0    0
##   BIOKEY_13    0  3712    0
##   BIOKEY_14    0     0 3409
##   BIOKEY_15    0     0 6874
##   BIOKEY_16    0     0 8706
##   BIOKEY_17 3104     0    0
##   BIOKEY_18 3660     0    0
##   BIOKEY_19    0     0 4926
##   BIOKEY_2     0     0 4679
##   BIOKEY_20 2164     0    0
##   BIOKEY_21 3280     0    0
##   BIOKEY_22 2295     0    0
##   BIOKEY_23    0  2764    0
##   BIOKEY_24 2363     0    0
##   BIOKEY_25    0     0  613
##   BIOKEY_26    0     0 5711
##   BIOKEY_27 3491     0    0
##   BIOKEY_28    0  2514    0
##   BIOKEY_29  857     0    0
##   BIOKEY_3  3780     0    0
##   BIOKEY_30 3030     0    0
##   BIOKEY_31    0     0 4581
##   BIOKEY_4  6480     0    0
##   BIOKEY_5  2711     0    0
##   BIOKEY_6  3640     0    0
##   BIOKEY_7  1640     0    0
##   BIOKEY_8     0     0 1405
##   BIOKEY_9     0     0 2809
## I don't think I need these:



## Don't need those!

subtype_cc = c("black", "purple", "lightblue", "blue", "green")[as.numeric(as.factor(PAM50pre$subtype))]
bctype_cc = c("blue", "purple", "black")[as.numeric(as.factor(bc_type))]
table(bc_type, bctype_cc)
##        bctype_cc
## bc_type black blue purple
##   ER+       0   12      0
##   HER2+     0    0      2
##   TNBC     12    0      0

The unsupervised clustering is shown here. I then also scaled the delta2000 object so that all gene expression values are scaled between -3 and +3. I then determine the quantiles of the delta2000 object, determine colours for the molecular subtypes of cancer, as well as the BC type. A heatmap is shown, where the colours are based off their quantiles, and I created a boxplot of this too, as well as a density plot.

what actually is drcg_cols? Ask Mik.

Making cutd

colnames(delta2000_scale) = colnames(delta2000)
deltaclust = hclust(as.dist(1-cor(delta2000_scale)))
deltad = as.dendrogram(deltaclust)
cutd = cutree((deltad),2)
table(cutd)
## cutd
##  1  2 
## 13 13
cutd[1:10]
##  T_BIOKEY1delta T_BIOKEY10delta T_BIOKEY11delta E_BIOKEY12delta H_BIOKEY13delta 
##               1               2               2               2               1 
## T_BIOKEY14delta T_BIOKEY15delta T_BIOKEY16delta E_BIOKEY17delta E_BIOKEY18delta 
##               1               2               1               2               2
par(mar = c(8,3,2,2))
plot(color_branches(deltad,2),leaflab="p", cex = 1, axes=F)

here I performed the unsupervised clustering of the delta2000 object to create the infamous red/green cluster diagram.

04/08/2022: changed the deltaclust object from deltaclust = hclust(as.dist(1-cor(delta2000))) to deltaclust = hclust(as.dist(1-cor(delta2000_scale)))

Expression analysis on deltag

group = rep( c( "PRE", "ON"), c(26,26))
designdeltag = model.matrix(~rep(1, ncol(delta2000)) - 1)


deltaglim = lmFit(delta2000, designdeltag)
deltaglim = eBayes(deltaglim)
ttdeltaglim = topTable(deltaglim, coef=1, adjust.method = "BH", n=nrow(delta2000)) 


ttdeltagsig=ttdeltaglim[ttdeltaglim$adj.P.Val<0.05,]
ttdeltagsig[1:10,]
##              logFC   AveExpr         t      P.Value    adj.P.Val         B
## HBA2     -4.063105 -4.063105 -5.520202 1.801463e-07 0.0002072791 6.7369872
## TUBA3D    3.246357  3.246357  5.489632 2.072791e-07 0.0002072791 6.6124548
## HBA1     -3.288387 -3.288387 -4.816309 4.052127e-06 0.0027014183 3.9785527
## HBB      -3.584036 -3.584036 -4.480017 1.633443e-05 0.0081672166 2.7486406
## HBM      -2.410997 -2.410997 -4.091959 7.512812e-05 0.0300512468 1.4086642
## IGLV3-21 -2.967492 -2.967492 -3.948976 1.287492e-04 0.0372666557 0.9377503
## IGHV4-28 -2.599833 -2.599833 -3.945483 1.304333e-04 0.0372666557 0.9264059
## CSF2     -2.262941 -2.262941 -3.841424 1.914034e-04 0.0478508408 0.5919855
## IGHV3-72 -2.819188 -2.819188 -3.797471 2.245830e-04 0.0499073320 0.4528164
## NA              NA        NA        NA           NA           NA        NA
rownames(ttdeltagsig)
## [1] "HBA2"     "TUBA3D"   "HBA1"     "HBB"      "HBM"      "IGLV3-21" "IGHV4-28"
## [8] "CSF2"     "IGHV3-72"
expDatdeltagsig = expDat[rownames(ttdeltagsig),]
heatmap.2(expDatdeltagsig, trace='none', scale='none', col=bluered, Rowv=T, Colv=T, dendrogram = 'both')

deltagsig = deltag[rownames(ttdeltagsig),]
heatmap.2(deltagsig, trace='none', scale='none', col=bluered, Rowv=T, Colv=T, dendrogram = 'both')

Deltag analysis performs limma analysis on all delta genes (on-pre), and then filters this for ones with an adjusted p-value over 0.05. To explore the original expression values better, I have made a heatmap of the raw expression values for these filtered genes.

Sample Data

sampleData = brcaDat1@meta.data[,-c(1:4,9:15),] %>% unique()
head(sampleData)
sampleData
sampleData = sampleData[-c(11:12,33:34,37:38,41:42,51:52),]
cbind(colnames(expDat), sampleData)

sampleDataAll = dplyr::arrange(sampleData, patient_id, desc(timepoint))
cbind(colnames(expDat), sampleDataAll)
rownames(sampleDataAll) = paste0(sampleDataAll$patient_id, "_", sampleDataAll$timepoint)

sampleData3 = brcaDat1@meta.data[,-c(1:4,6,9:15),] %>% unique()
sampleData3 = sampleData3[-c(6,17,19,21,26),]
rownames(sampleData3) = paste0(sampleData3$patient_id, "_", "Pre")
cbind(colnames(expDatpre), sampleData3)
sampleDataPatient = dplyr::arrange(sampleData3, patient_id)
cbind(colnames(expDatpre), sampleDataPatient)
cbind(colnames(delta2000_scale), sampleDataPatient)

Here I created the sampleDataPatient object; a matrix showing the patient ID, expansion, and BC type.

Heatmap.3

pidx = sort(sampleData3$patient_id)
cbind(pidx, colnames(expDatpre))
##       pidx                         
##  [1,] "BIOKEY_1"  "T_BIOKEY_1_Pre" 
##  [2,] "BIOKEY_10" "T_BIOKEY_10_Pre"
##  [3,] "BIOKEY_11" "T_BIOKEY_11_Pre"
##  [4,] "BIOKEY_12" "E_BIOKEY_12_Pre"
##  [5,] "BIOKEY_13" "H_BIOKEY_13_Pre"
##  [6,] "BIOKEY_14" "T_BIOKEY_14_Pre"
##  [7,] "BIOKEY_15" "T_BIOKEY_15_Pre"
##  [8,] "BIOKEY_16" "T_BIOKEY_16_Pre"
##  [9,] "BIOKEY_17" "E_BIOKEY_17_Pre"
## [10,] "BIOKEY_18" "E_BIOKEY_18_Pre"
## [11,] "BIOKEY_19" "T_BIOKEY_19_Pre"
## [12,] "BIOKEY_2"  "T_BIOKEY_2_Pre" 
## [13,] "BIOKEY_21" "E_BIOKEY_21_Pre"
## [14,] "BIOKEY_22" "E_BIOKEY_22_Pre"
## [15,] "BIOKEY_24" "E_BIOKEY_24_Pre"
## [16,] "BIOKEY_26" "T_BIOKEY_26_Pre"
## [17,] "BIOKEY_27" "E_BIOKEY_27_Pre"
## [18,] "BIOKEY_28" "H_BIOKEY_28_Pre"
## [19,] "BIOKEY_3"  "E_BIOKEY_3_Pre" 
## [20,] "BIOKEY_30" "E_BIOKEY_30_Pre"
## [21,] "BIOKEY_31" "T_BIOKEY_31_Pre"
## [22,] "BIOKEY_4"  "E_BIOKEY_4_Pre" 
## [23,] "BIOKEY_5"  "E_BIOKEY_5_Pre" 
## [24,] "BIOKEY_6"  "E_BIOKEY_6_Pre" 
## [25,] "BIOKEY_8"  "T_BIOKEY_8_Pre" 
## [26,] "BIOKEY_9"  "T_BIOKEY_9_Pre"
mt = match(pidx, sampleDataPatient$patient_id)

bc_type= sampleDataPatient$BC_type[mt]
sampleData3$BC_type[mt]
##  [1] "HER2+" "TNBC"  "TNBC"  "TNBC"  "TNBC"  "TNBC"  "HER2+" "ER+"   "TNBC" 
## [10] "TNBC"  "ER+"   "ER+"   "ER+"   "TNBC"  "TNBC"  "ER+"   "ER+"   "ER+"  
## [19] "TNBC"  "ER+"   "TNBC"  "TNBC"  "ER+"   "ER+"   "ER+"   "ER+"
expan = unique(brcaDat1@meta.data$expansion)

#testline haha

expan_cc = c("pink", "purple", "green")[sampleDataPatient$expansion]
expan_cc[1:10]
##  [1] NA NA NA NA NA NA NA NA NA NA
expan_cc = as.matrix(expan_cc)
expant = as.numeric(sampleDataPatient$expansion)
## Warning: NAs introduced by coercion
table(rownames(sampleData3), sampleData3$expansion)
##                
##                 E n/a NE
##   BIOKEY_1_Pre  0   1  0
##   BIOKEY_10_Pre 1   0  0
##   BIOKEY_11_Pre 1   0  0
##   BIOKEY_12_Pre 1   0  0
##   BIOKEY_13_Pre 0   1  0
##   BIOKEY_14_Pre 0   0  1
##   BIOKEY_15_Pre 1   0  0
##   BIOKEY_16_Pre 1   0  0
##   BIOKEY_17_Pre 0   0  1
##   BIOKEY_18_Pre 1   0  0
##   BIOKEY_19_Pre 0   0  1
##   BIOKEY_2_Pre  0   0  1
##   BIOKEY_21_Pre 0   0  1
##   BIOKEY_22_Pre 0   0  1
##   BIOKEY_24_Pre 0   0  1
##   BIOKEY_26_Pre 0   0  1
##   BIOKEY_27_Pre 0   0  1
##   BIOKEY_28_Pre 1   0  0
##   BIOKEY_3_Pre  0   0  1
##   BIOKEY_30_Pre 0   0  1
##   BIOKEY_31_Pre 1   0  0
##   BIOKEY_4_Pre  0   0  1
##   BIOKEY_5_Pre  1   0  0
##   BIOKEY_6_Pre  0   0  1
##   BIOKEY_8_Pre  0   0  1
##   BIOKEY_9_Pre  0   0  1
subtype_cc = c("black", "purple", "lightblue", "blue", "green")[as.numeric(as.factor(PAM50pre$subtype))]

bctype_cc = c("blue", "purple", "black")[as.numeric(as.factor(sampleDataPatient$BC_type))]

sampleDataPatient$BC_type[1:10]
##  [1] "TNBC"  "TNBC"  "TNBC"  "ER+"   "HER2+" "TNBC"  "TNBC"  "TNBC"  "ER+"  
## [10] "ER+"
clustergroups_cc = c("pink", "purple")[cutd]
matrix_cc = rbind(subtype_cc, clustergroups_cc, bctype_cc)
rownames(matrix_cc) = c("Intrinsic Subtypes", "Cluster Groups", "BC Type")
matrix_cc = t(matrix_cc)
dim(matrix_cc)

dim(delta2000_scale)

rownames(matrix_cc) = colnames(delta2000_scale)
matrix_ccnona = matrix_cc[-c(1,5),]
dim(matrix_ccnona)
heatmap.3(delta2000_scale, trace='none', scale='none', col=bluered(50),
            ColSideColors = matrix_cc, margins=c(7,4),labCol = colnames(delta2000_scale), 
          distfun = function(x) as.dist(1-cor(t(x))))

table(brcaDat1@meta.data$BC_type, brcaDat1@meta.data$patient_id)
##        
##         BIOKEY_1 BIOKEY_10 BIOKEY_11 BIOKEY_12 BIOKEY_13 BIOKEY_14 BIOKEY_15
##   ER+          0         0         0      6534         0         0         0
##   HER2+        0         0         0         0      3712         0         0
##   TNBC      8103      9192      3966         0         0      3409      6874
##        
##         BIOKEY_16 BIOKEY_17 BIOKEY_18 BIOKEY_19 BIOKEY_2 BIOKEY_20 BIOKEY_21
##   ER+           0      3104      3660         0        0      2164      3280
##   HER2+         0         0         0         0        0         0         0
##   TNBC       8706         0         0      4926     4679         0         0
##        
##         BIOKEY_22 BIOKEY_23 BIOKEY_24 BIOKEY_25 BIOKEY_26 BIOKEY_27 BIOKEY_28
##   ER+        2295         0      2363         0         0      3491         0
##   HER2+         0      2764         0         0         0         0      2514
##   TNBC          0         0         0       613      5711         0         0
##        
##         BIOKEY_29 BIOKEY_3 BIOKEY_30 BIOKEY_31 BIOKEY_4 BIOKEY_5 BIOKEY_6
##   ER+         857     3780      3030         0     6480     2711     3640
##   HER2+         0        0         0         0        0        0        0
##   TNBC          0        0         0      4581        0        0        0
##        
##         BIOKEY_7 BIOKEY_8 BIOKEY_9
##   ER+       1640        0        0
##   HER2+        0        0        0
##   TNBC         0     1405     2809

In this section I created a heatmap.3, showing, for all samples, their cancer type, intrinsic molecular subtype, and whether they expanded on treatment or not. (ask mik to help with expan)

Finding Es or NEs

design = model.matrix(~rep(1, ncol(deltag)) - 1)
design[1:10]
##  [1] 1 1 1 1 1 1 1 1 1 1
designcutd = model.matrix(~cutd)
designcutd[1:10]
##  [1] 1 1 1 1 1 1 1 1 1 1
sampleDataPatientnona = sampleDataPatient[c(2:4, 6:26),]
delta2000nona = delta2000[, c(2:4, 6:26)]

expan = sampleDataPatientnona$expansion

expan = as.matrix(expan)
expanx = ifelse(sampleDataPatientnona$expansion=="E",1,2)
expanx = as.matrix(expanx)
expanx[1:10]
##  [1] 1 1 1 2 1 1 2 1 2 2
rownames(expanx) = rownames(sampleDataPatientnona)
designexpanrownames = sort(rownames(expanx))

designexpan = model.matrix(~expanx)
designexpan[1:10]
##  [1] 1 1 1 1 1 1 1 1 1 1
rownames(designexpan) = designexpanrownames
colnames(designexpan) = c("Intercept", "NEorNAis2")

expanlim = lmFit(delta2000nona, designexpan)
expanlim = eBayes(expanlim)
ttexpan = topTable(expanlim, coef=2, adjust.method = "BH", n=nrow(delta2000))
ttexpan[1:10,]
##                logFC     AveExpr         t      P.Value adj.P.Val            B
## AL031733.2 -4.325872  1.35067013 -4.133936 0.0001052666 0.1508778  0.813653649
## IGLV3-19   -5.759697 -1.25978520 -3.922705 0.0002154659 0.1508778  0.258196309
## TRBV15     -4.873191 -1.19771268 -3.852849 0.0002719043 0.1508778  0.077922814
## TRBV11-3   -4.872315  0.66476707 -3.821357 0.0003017555 0.1508778 -0.002763729
## IGHV1-46   -5.026703 -1.17420663 -3.440615 0.0010243851 0.3316939 -0.947324970
## IGKV3D-20  -5.085056 -1.12172579 -3.408367 0.0011323679 0.3316939 -1.024532117
## TRAV8-1    -4.415424 -1.32769500 -3.387458 0.0012080421 0.3316939 -1.074343067
## EGR4       -3.803442  0.33185909 -3.333703 0.0014251233 0.3316939 -1.201493575
## AC104971.3 -3.779747  0.06468157 -3.318561 0.0014926226 0.3316939 -1.237072774
## CR2        -3.844174  1.01826237 -3.215965 0.0020356824 0.3750220 -1.475315207
ttexpan200 = rownames(ttexpan[1:200,])


# heatmap.3(ttexpan, trace='none', scale='none', col=bluered(50), ColSideColors = matrix_ccnona, margins=c(7,4),labCol = #colnames(delta2000_scalenona),distfun = function(x) as.dist(1-cor(t(x))))

dim(designexpan)
## [1] 24  2
matrix_cc
##                 Intrinsic Subtypes Cluster Groups BC Type 
## T_BIOKEY1delta  "black"            "pink"         "black" 
## T_BIOKEY10delta "purple"           "purple"       "black" 
## T_BIOKEY11delta "purple"           "purple"       "black" 
## E_BIOKEY12delta "lightblue"        "purple"       "blue"  
## H_BIOKEY13delta "black"            "pink"         "purple"
## T_BIOKEY14delta "green"            "pink"         "black" 
## T_BIOKEY15delta "black"            "purple"       "black" 
## T_BIOKEY16delta "black"            "pink"         "black" 
## E_BIOKEY17delta "lightblue"        "purple"       "blue"  
## E_BIOKEY18delta "blue"             "purple"       "blue"  
## T_BIOKEY19delta "green"            "pink"         "black" 
## T_BIOKEY2delta  "black"            "purple"       "black" 
## E_BIOKEY21delta "lightblue"        "pink"         "blue"  
## E_BIOKEY22delta "blue"             "pink"         "blue"  
## E_BIOKEY24delta "blue"             "pink"         "blue"  
## T_BIOKEY26delta "black"            "pink"         "black" 
## E_BIOKEY27delta "blue"             "pink"         "blue"  
## H_BIOKEY28delta "black"            "purple"       "purple"
## E_BIOKEY3delta  "purple"           "pink"         "blue"  
## E_BIOKEY30delta "blue"             "purple"       "blue"  
## T_BIOKEY31delta "black"            "purple"       "black" 
## E_BIOKEY4delta  "lightblue"        "purple"       "blue"  
## E_BIOKEY5delta  "lightblue"        "pink"         "blue"  
## E_BIOKEY6delta  "blue"             "purple"       "blue"  
## T_BIOKEY8delta  "green"            "purple"       "black" 
## T_BIOKEY9delta  "black"            "pink"         "black"
dim(delta2000_scale)
## [1] 2000   26
delta2000_scalenona = delta2000_scale[,-c(1,5)]

In this differential expression analysis I am comparing, from all samples, those that did have TCR expansion upon treatment vs those that did not have TCR expansion upon treatment.

library(reactome.db)
reactome()
## Quality control information for reactome:
## 
## 
## This package has the following mappings:
## 
## reactomeEXTID2PATHID has 52575 mapped keys (of 52575 keys)
## reactomeGO2REACTOMEID has 1435 mapped keys (of 1435 keys)
## reactomePATHID2EXTID has 16760 mapped keys (of 16760 keys)
## reactomePATHID2NAME has 21323 mapped keys (of 21323 keys)
## reactomePATHNAME2ID has 21292 mapped keys (of 21292 keys)
## reactomeREACTOMEID2GO has 13963 mapped keys (of 13963 keys)
## 
## 
## Additional Information about this package:
## 
## DB schema: REACTOME_DB
## DB schema version: 79
lapply( as.list(reactomePATHID2EXTID)[1:2], head )
## $`R-HSA-109582`
## [1] "1"     "10019" "10112" "10125" "10125" "1017" 
## 
## $`R-HSA-114608`
## [1] "1"     "10184" "10257" "10447" "10487" "10490"
as.list(reactomePATHID2NAME)[1:2]
## $`R-BTA-73843`
## [1] "1-diphosphate: 5-Phosphoribose"
## 
## $`R-BTA-1971475`
## [1] "Bos taurus\r: A tetrasaccharide linker sequence is required for GAG synthesis"
library(org.Hs.eg.db)
ann = AnnotationDbi::select(org.Hs.eg.db, keys=ttexpan200,
keytype="SYMBOL", columns="ENTREZID") 

library(ReactomePA)
sigEntrez = ann$ENTREZID
sigEntrez[1:10]
##  [1] NA      "28797" "28572" "28580" "28465" "28874" "28685" "1961"  NA     
## [10] "1380"
rPAoverrep = enrichPathway(gene=sigEntrez, organism = "human", pvalueCutoff=0.1, readable=T)

rpasig = rPAoverrep[rPAoverrep@result$p.adjust<0.05,]
rpasig %>% as.data.frame() %>% knitr::kable()
ID Description GeneRatio BgRatio pvalue p.adjust qvalue geneID Count
rpasigid = rpasig$geneID

rpasigidcat = cat(rpasigid, sep="\n")

Differential expression analysis (ER+ NE vs ER+ E)

sDPernona = sampleDataPatientnona[c(3,7,8,11:13,15,17,18,20:22),]
delta2000ernona = delta2000nona[, c(3,7,8,11:13,15,17,18,20:22)]

expanernona = sDPernona$expansion

expanernona = as.matrix(expanernona)
expanxernona = ifelse(sDPernona$expansion=="E",1,2)
expanxernona = as.matrix(expanxernona)
rownames(expanxernona) = rownames(sDPernona)
designexpanrownamesernona = sort(rownames(expanxernona))

designexpanernona = model.matrix(~expanxernona)
rownames(designexpanernona) = designexpanrownamesernona
colnames(designexpanernona) = c("Intercept", "NEis2")

expanlimernona = lmFit(delta2000ernona, designexpanernona)
expanlimernona = eBayes(expanlimernona)
ttexpanernona = topTable(expanlimernona, coef=2, adjust.method = "BH", n=nrow(delta2000ernona))
ttexpanernona[1:10,]
##                logFC     AveExpr         t      P.Value adj.P.Val          B
## IGLV3-19   -9.172286 -2.51432632 -4.291132 0.0002653457 0.2885612 -0.3049814
## IGHV2-26   -6.344066  0.66816235 -4.230732 0.0003087585 0.2885612 -0.4027982
## HBB        10.298148 -3.13445924  4.083010 0.0004470476 0.2885612 -0.6429953
## AC129492.1 -5.980862 -0.40993195 -3.889357 0.0007250294 0.2885612 -0.9593849
## IGKV1-6    -8.408806 -0.05654878 -3.866863 0.0007667950 0.2885612 -0.9962075
## IGLV2-8    -9.337660 -1.62935994 -3.818089 0.0008656835 0.2885612 -1.0760764
## AC079341.1 -5.242461  0.92919376 -3.712145 0.0011258311 0.3216660 -1.2496237
## RN7SL832P  -5.463674 -0.77891718 -3.344216 0.0027756161 0.6939040 -1.8504523
## CYP2F1     -4.591541  0.88294177 -3.263862 0.0033711737 0.7491497 -1.9806821
## ACTA1       4.941371  0.66435753  3.140075 0.0045375758 0.9075152 -2.1801383
dim(ttexpanernona)
## [1] 2000    6
volcanoplot(expanlimernona, coef = 2, xlab = "log2FC", ylab = "-log10P_value")

par(mfrow=c(3,4))
for(i in rpasigid) boxplot( expDat[i,] ~ rep(c("O", "P"), 26) * rep(cutd,rep(2,length(cutd))),
                         main=i)

for(i in rpasigid) boxplot(deltag[i,] ~ rep(c("Clustered groups"), 26) * sampleDataPatient$BC_type)


boxplot(deltag["IGLV2-8",] ~ rep(c("O"), 26) * sampleDataPatient$BC_type)

This analysis performs a differential expression analysis of the ER+ samples, comparing those that had TCR expansion to those that did not have TCR expansion. I then also started to investigate this IGLV2-8 gene, as well as beginning investigation of the rpasigid object, which contains the geneIDs of the statistically significantly overrepresented genes from GSEA.

Testing Mik’s FN14 code

library(ggplot2)

DimPlot(brcaDat1, reduction = "tsne", label = F, label.box = F, label.size = 4, repel = F, group.by = "cellType", raster = F, split.by = "BC_type")

DimPlot(brcaDat1, reduction = "tsne", label = F, label.box = F, label.size = 4, repel = F, group.by = "CHETAH", raster = F, split.by = "BC_type")

table(brcaDat1@meta.data$CHETAH)
## 
##           CAF    CD4 T cell    CD8 T cell     Dendritic   Endothelial 
##          8780           894           817            15           725 
##    Macrophage          Mast Myofibroblast            NK         Node1 
##           113             1           174           158          9526 
##        Node10         Node2         Node3         Node4         Node5 
##          1073          1835          5525           714          3534 
##         Node6         Node7         Node8         Node9        Plasma 
##           616            14            37          1886           235 
##   reg. T cell    Unassigned 
##           513         85808
prop.table(table(brcaDat1@meta.data$CHETAH),1)
## 
##           CAF    CD4 T cell    CD8 T cell     Dendritic   Endothelial 
##             1             1             1             1             1 
##    Macrophage          Mast Myofibroblast            NK         Node1 
##             1             1             1             1             1 
##        Node10         Node2         Node3         Node4         Node5 
##             1             1             1             1             1 
##         Node6         Node7         Node8         Node9        Plasma 
##             1             1             1             1             1 
##   reg. T cell    Unassigned 
##             1             1
FeaturePlot(brcaDat1, features = c("PDCD1"), reduction = 'tsne', max.cutoff = 3, ncol = 3, split.by = "BC_type")

kp = brcaDat1@meta.data[["cellType"]]=="Cancer_cell"
FeaturePlot(brcaDat1, features = c("PDCD1"), reduction = 'tsne', max.cutoff = 3, ncol = 3, split.by = "BC_type", cells = which(kp))

FeaturePlot(brcaDat1, features = c("IQSEC2"), 
                         reduction = 'tsne', max.cutoff = 3, ncol = 3, split.by = "BC_type")

fn14 = brcaDat1@assays$RNA["CD163L1",] %>%  as.numeric()
cell_type_tab = table(brcaDat1@meta.data[["cellType"]], fn14>0)
prop = round(prop.table(cell_type_tab, 1)[,2],3)
knitr::kable(cbind(cell_type_tab, prop), col.names = c("FN14=0", "FN14>0", "Proportion"))
FN14=0 FN14>0 Proportion
B_cell 10033 15 0.001
Cancer_cell 29234 838 0.028
Endothelial_cell 4256 40 0.009
Fibroblast 17746 225 0.013
Mast_cell 721 0 0.000
Myeloid_cell 10737 320 0.029
pDC 705 0 0.000
T_cell 48074 49 0.001
data.frame(fn14, 
           cellType = brcaDat1@meta.data[["cellType"]], 
           bcType = brcaDat1@meta.data[["BC_type"]], 
           trt = brcaDat1@meta.data[["timepoint"]], 
           patID = brcaDat1@meta.data[["patient_id"]],
           exp = brcaDat1@meta.data[["expansion"]]) %>% 
  dplyr::filter(fn14>0, cellType == "Cancer_cell", bcType!="HER2+") %>% 
  ggplot(., aes(x=patID, y=fn14)) + 
  geom_boxplot(outlier.alpha=0) +
  geom_jitter(size=0.1) +
  facet_wrap(~exp, scales = 'free_x') + xlab("") +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + 
  ggtitle("FN14 in fibroblasts only")

data.frame(fn14, 
           cellType = brcaDat1@meta.data[["cellType"]], 
           bcType = brcaDat1@meta.data[["BC_type"]], 
           trt = brcaDat1@meta.data[["timepoint"]], 
           patID = brcaDat1@meta.data[["patient_id"]],
           exp = brcaDat1@meta.data[["expansion"]]) %>% 
  dplyr::filter(fn14>0, cellType == "Cancer_cell", bcType=="TNBC", exp!="n/a") %>% 
  ggplot(., aes(x=patID, y=fn14, fill=trt)) + 
  geom_boxplot(outlier.alpha=0) +
  geom_point(size=0.1, position=position_jitterdodge(jitter.width=0.12)) +
  facet_wrap(~exp, scales = 'free_x') + xlab("") +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

data.frame(fn14, 
           cellType = brcaDat1@meta.data[["cellType"]], 
           bcType = brcaDat1@meta.data[["BC_type"]], 
           trt = brcaDat1@meta.data[["timepoint"]]) %>% 
  dplyr::filter(fn14>0) %>% 
  ggplot(., aes(x=cellType, y=fn14, fill=bcType), base_size=5) + 
  geom_boxplot(outlier.alpha=0) + 
  geom_point(size=0.1, position=position_jitterdodge(jitter.width=0.12)) + xlab("Cell Type") + ylab("CD163L1 expression")

This code chunk is sort of just a test, I’m just playing around with graphical presentations. I should explore this more to get some cool diagrams for my thesis!

BEGINNING IMPORTANT E VS NE ANALYSES:

ER+ pre-treatment E vs NE samples

sDPpreer = sampleDataPatient[c(4,9:10,13:15,17,19:20,22:24),]
expDatpreer = expDatpre[,c(4,9:10,13:15,17,19:20,22:24)]

expanpreer = sDPpreer$expansion  

expanpreer = as.matrix(expanpreer)
expanpreer = ifelse(sDPpreer$expansion=="E",1,2)
expanpreer = as.matrix(expanpreer)
rownames(expanpreer) = rownames(sDPpreer)
designexpanrownamespreer = sort(rownames(expanpreer))

designexpanpreer = model.matrix(~expanpreer)
rownames(designexpanpreer) = designexpanrownamespreer
colnames(designexpanpreer) = c("Intercept", "NEis2")

expanlimpreer = lmFit(expDatpreer, designexpanpreer)
expanlimpreer = eBayes(expanlimpreer)
ttexpanpreer = topTable(expanlimpreer, coef=2, adjust.method = "BH", n=nrow(expDatpreer))
ttexpanpreer[1:10,]
##                logFC    AveExpr         t      P.Value   adj.P.Val         B
## CD163L1     5.582422  1.3117436  9.456627 3.469769e-07 0.008774353 -1.094997
## AC025283.2  4.320308  0.3651582  7.551229 4.221597e-06 0.053377866 -1.429583
## KRT17       6.866016  3.4697135  5.953387 4.827880e-05 0.354111657 -1.875140
## CDKL2      -3.767975 -1.8340165 -5.772432 6.503038e-05 0.354111657 -1.939019
## RPP40       3.292151  1.8815138  5.727983 7.001575e-05 0.354111657 -1.955201
## TACSTD2     3.156901  5.0175682  5.583690 8.915770e-05 0.375769987 -2.009105
## DAPL1       4.283263  1.3820517  5.224851 1.646792e-04 0.503400586 -2.152675
## AL731557.1 -3.486447 -1.9436841 -5.183956 1.768059e-04 0.503400586 -2.169935
## PRSS16      3.374282  1.3985308  5.165114 1.827039e-04 0.503400586 -2.177950
## CHRNA7     -3.397043 -1.5587374 -5.115991 1.990670e-04 0.503400586 -2.199038
ttexpanpreer[1:10,]
##                logFC    AveExpr         t      P.Value   adj.P.Val         B
## CD163L1     5.582422  1.3117436  9.456627 3.469769e-07 0.008774353 -1.094997
## AC025283.2  4.320308  0.3651582  7.551229 4.221597e-06 0.053377866 -1.429583
## KRT17       6.866016  3.4697135  5.953387 4.827880e-05 0.354111657 -1.875140
## CDKL2      -3.767975 -1.8340165 -5.772432 6.503038e-05 0.354111657 -1.939019
## RPP40       3.292151  1.8815138  5.727983 7.001575e-05 0.354111657 -1.955201
## TACSTD2     3.156901  5.0175682  5.583690 8.915770e-05 0.375769987 -2.009105
## DAPL1       4.283263  1.3820517  5.224851 1.646792e-04 0.503400586 -2.152675
## AL731557.1 -3.486447 -1.9436841 -5.183956 1.768059e-04 0.503400586 -2.169935
## PRSS16      3.374282  1.3985308  5.165114 1.827039e-04 0.503400586 -2.177950
## CHRNA7     -3.397043 -1.5587374 -5.115991 1.990670e-04 0.503400586 -2.199038
ttexpanpreer[5793,]
##         logFC  AveExpr        t   P.Value adj.P.Val         B
## JUP 0.7290147 5.686567 1.063748 0.3068324 0.9629482 -4.648902
ttexpanpreersig=ttexpanpreer[ttexpanpreer$adj.P.Val<0.05,]
ttexpanpreersig[1:10,]
##            logFC  AveExpr        t      P.Value   adj.P.Val         B
## CD163L1 5.582422 1.311744 9.456627 3.469769e-07 0.008774353 -1.094997
## NA            NA       NA       NA           NA          NA        NA
## NA.1          NA       NA       NA           NA          NA        NA
## NA.2          NA       NA       NA           NA          NA        NA
## NA.3          NA       NA       NA           NA          NA        NA
## NA.4          NA       NA       NA           NA          NA        NA
## NA.5          NA       NA       NA           NA          NA        NA
## NA.6          NA       NA       NA           NA          NA        NA
## NA.7          NA       NA       NA           NA          NA        NA
## NA.8          NA       NA       NA           NA          NA        NA
rownames(ttexpanpreersig)
## [1] "CD163L1"
##

match("IGLV2-8", rownames(ttexpanpreer))
## [1] 3969
match("PTN", rownames(ttexpanpreer))
## [1] 786
match("IQSEC2", rownames(ttexpanpreer))
## [1] 130
ttexpanpreer[130,]
##          logFC  AveExpr        t     P.Value adj.P.Val        B
## IQSEC2 3.54023 2.567533 3.337024 0.005362782 0.9629482 -3.16074
match("HIST1H2AI", rownames(ttexpanpreer))
## [1] 5067
ttexpanpreer[5067,]
##               logFC   AveExpr        t   P.Value adj.P.Val         B
## HIST1H2AI -1.552631 -1.279491 -1.15892 0.2673656 0.9629482 -4.601018
match("CD247", rownames(ttexpanpreer))
## [1] 5620
ttexpanpreer[5620,]
##           logFC  AveExpr         t   P.Value adj.P.Val         B
## CD247 -1.202195 5.162475 -1.087644 0.2965364 0.9629482 -4.637164
match("SLC4A1", rownames(ttexpanpreer))
## [1] 578
ttexpanpreer[578,]
##            logFC   AveExpr         t    P.Value adj.P.Val         B
## SLC4A1 -1.877952 -2.596204 -2.359828 0.03461331 0.9629482 -3.829772
match("AL031733.2", rownames(ttexpanpreer))
## [1] 16319
ttexpanpreer[match("AL031733.2", rownames(ttexpanpreer)),]
##                 logFC   AveExpr          t   P.Value adj.P.Val         B
## AL031733.2 -0.1906197 -3.018037 -0.3001253 0.7688341 0.9629482 -4.901747
ttexpanpreer[match("STX8", rownames(ttexpanpreer)),]
##          logFC  AveExpr        t   P.Value adj.P.Val         B
## STX8 0.6671938 5.151681 1.579404 0.1382944 0.9629482 -4.358354
topgene = match(rownames(ttexpanpreer)[1], rownames( expDatpre))
ttest = t.test (expDatpreer[topgene,]~designexpanpreer[,2])
ttest
## 
##  Welch Two Sample t-test
## 
## data:  expDatpreer[topgene, ] by designexpanpreer[, 2]
## t = -8.8635, df = 3.0186, p-value = 0.002955
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -7.579846 -3.584997
## sample estimates:
## mean in group 1 mean in group 2 
##       -2.875073        2.707349
save(list = "expanlimpreer", file = "expanlimpreer.Rdata")

This statistical analysis repeats the above ER+ E vs ER+ NE, but this analysis only uses the pre-treatment samples, whereas the last analysis used the delta2000. This means that results from this analysis may better help in predictive values for anti-PD1 response in ER+ BC.

From the t-test, it can be seen that the mean in group 1 (E) has an expression of -2.36, while that of group 2 (NE) is 2.71

GSEA er+ pre E vs NE

library("DOSE")
library("clusterProfiler")

ttexpanpreernames = rownames(ttexpanpreer)
##

expanlimpreerstats = expanlimpreer$t[,2]
expanlimpreerstats %>% sort(., decreasing=TRUE) %>% head()
##    CD163L1 AC025283.2      KRT17      RPP40    TACSTD2      DAPL1 
##   9.456627   7.551229   5.953387   5.727983   5.583690   5.224851
expanlimpreerallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpreerstats), keytype="SYMBOL", columns="ENTREZID")

names(expanlimpreerstats) = expanlimpreerallEntrez$ENTREZID[ match(names(expanlimpreerstats), expanlimpreerallEntrez$SYMBOL)]
 

expanlimpreerstats %>% sort(., decreasing=TRUE) %>% head()
##   283316     <NA>     3872    10799     4070    92196 
## 9.456627 7.551229 5.953387 5.727983 5.583690 5.224851
expanlimpreerstats_sort = sort(expanlimpreerstats, decreasing=TRUE)
 
expanlimpreerrPAgsea = gseGO(expanlimpreerstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (20.32% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpreerrPAgsea[1:5,] %>% knitr::kable()
ID Description setSize enrichmentScore NES pvalue p.adjust qvalues rank leading_edge core_enrichment
GO:2000027 GO:2000027 regulation of animal organ morphogenesis 128 0.5486568 2.300298 0.0001673 0.0020584 0.0012012 3423 tags=41%, list=14%, signal=36% 4070/9863/7481/6092/6091/9620/3082/2246/57216/6591/144165/57664/7422/2719/4920/1601/9241/367/219736/7474/2252/1855/128272/59277/166336/2535/7476/2239/64750/6662/80320/23213/10252/652/6423/9500/1952/6422/655/7490/7472/79864/10818/8324/30812/8321/27031/26585/6495/57154/55727/22881/10082
GO:0045216 GO:0045216 cell-cell junction organization 198 0.5054890 2.244662 0.0001614 0.0020584 0.0012012 4866 tags=41%, list=19%, signal=33% 9076/1525/153562/1364/1366/1365/7481/9073/79778/1829/100506658/6714/1436/64065/6591/928/8650/7422/8443/50848/5584/1500/7042/143098/1009/1948/57662/999/80139/85302/5872/1495/5317/10207/7043/8777/7122/9253/55243/4641/286676/1832/23396/222256/7414/4771/6833/91862/421/57555/7368/7082/29841/2706/8502/654/79191/2697/3728/3553/5010/23136/8642/79629/5318/187/79987/9231/4867/1003/10979/389/5657/83700/1894/6722/402/24146/83660/1501/1000
GO:0032963 GO:0032963 collagen metabolic process 91 0.5598317 2.232985 0.0001709 0.0020584 0.0012012 4745 tags=48%, list=19%, signal=39% 55214/55323/4327/79148/4239/54206/4323/5159/1514/1289/7043/5045/11117/1471/90993/2191/9902/7349/57333/4320/1278/871/652/1277/1513/861/4314/8510/114757/8751/9697/4880/4318/55512/4312/51430/4313/5653/63894/5657/10893/23371/5467/5645
GO:0060688 GO:0060688 regulation of morphogenesis of a branching structure 50 0.6217732 2.223879 0.0001768 0.0020584 0.0012012 3588 tags=52%, list=14%, signal=45% 4070/3082/6591/7422/9241/9175/367/7474/2252/59277/2263/6662/4192/23213/652/9500/120892/6422/655/2099/7472/30812/26585/6495/55727/6474
GO:0030199 GO:0030199 collagen fibril organization 59 0.6011415 2.219806 0.0001756 0.0020584 0.0012012 4258 tags=56%, list=17%, signal=47% 2331/81792/7042/4015/1281/4921/1290/30008/1289/1805/11117/10491/60681/1303/165/4060/84171/4320/1278/871/6423/1277/4017/84695/7837/1311/4016/26585/1545/50509/63894/7092/7373
expanlimpreerrPAgsea[1:5,]
##                    ID                                          Description
## GO:2000027 GO:2000027             regulation of animal organ morphogenesis
## GO:0045216 GO:0045216                      cell-cell junction organization
## GO:0032963 GO:0032963                           collagen metabolic process
## GO:0060688 GO:0060688 regulation of morphogenesis of a branching structure
## GO:0030199 GO:0030199                         collagen fibril organization
##            setSize enrichmentScore      NES       pvalue    p.adjust
## GO:2000027     128       0.5486568 2.300298 0.0001673360 0.002058355
## GO:0045216     198       0.5054890 2.244662 0.0001613944 0.002058355
## GO:0032963      91       0.5598317 2.232985 0.0001708526 0.002058355
## GO:0060688      50       0.6217732 2.223879 0.0001768347 0.002058355
## GO:0030199      59       0.6011415 2.219806 0.0001755618 0.002058355
##                qvalues rank                   leading_edge
## GO:2000027 0.001201235 3423 tags=41%, list=14%, signal=36%
## GO:0045216 0.001201235 4866 tags=41%, list=19%, signal=33%
## GO:0032963 0.001201235 4745 tags=48%, list=19%, signal=39%
## GO:0060688 0.001201235 3588 tags=52%, list=14%, signal=45%
## GO:0030199 0.001201235 4258 tags=56%, list=17%, signal=47%
##                                                                                                                                                                                                                                                                                                                                                                                                                                           core_enrichment
## GO:2000027                                                                                                                                                  4070/9863/7481/6092/6091/9620/3082/2246/57216/6591/144165/57664/7422/2719/4920/1601/9241/367/219736/7474/2252/1855/128272/59277/166336/2535/7476/2239/64750/6662/80320/23213/10252/652/6423/9500/1952/6422/655/7490/7472/79864/10818/8324/30812/8321/27031/26585/6495/57154/55727/22881/10082
## GO:0045216 9076/1525/153562/1364/1366/1365/7481/9073/79778/1829/100506658/6714/1436/64065/6591/928/8650/7422/8443/50848/5584/1500/7042/143098/1009/1948/57662/999/80139/85302/5872/1495/5317/10207/7043/8777/7122/9253/55243/4641/286676/1832/23396/222256/7414/4771/6833/91862/421/57555/7368/7082/29841/2706/8502/654/79191/2697/3728/3553/5010/23136/8642/79629/5318/187/79987/9231/4867/1003/10979/389/5657/83700/1894/6722/402/24146/83660/1501/1000
## GO:0032963                                                                                                                                                                                                         55214/55323/4327/79148/4239/54206/4323/5159/1514/1289/7043/5045/11117/1471/90993/2191/9902/7349/57333/4320/1278/871/652/1277/1513/861/4314/8510/114757/8751/9697/4880/4318/55512/4312/51430/4313/5653/63894/5657/10893/23371/5467/5645
## GO:0060688                                                                                                                                                                                                                                                                                                          4070/3082/6591/7422/9241/9175/367/7474/2252/59277/2263/6662/4192/23213/652/9500/120892/6422/655/2099/7472/30812/26585/6495/55727/6474
## GO:0030199                                                                                                                                                                                                                                                                   2331/81792/7042/4015/1281/4921/1290/30008/1289/1805/11117/10491/60681/1303/165/4060/84171/4320/1278/871/6423/1277/4017/84695/7837/1311/4016/26585/1545/50509/63894/7092/7373
expanlimpreerrPAgsea[1:5,11]
## [1] "4070/9863/7481/6092/6091/9620/3082/2246/57216/6591/144165/57664/7422/2719/4920/1601/9241/367/219736/7474/2252/1855/128272/59277/166336/2535/7476/2239/64750/6662/80320/23213/10252/652/6423/9500/1952/6422/655/7490/7472/79864/10818/8324/30812/8321/27031/26585/6495/57154/55727/22881/10082"                                                                                                                                                 
## [2] "9076/1525/153562/1364/1366/1365/7481/9073/79778/1829/100506658/6714/1436/64065/6591/928/8650/7422/8443/50848/5584/1500/7042/143098/1009/1948/57662/999/80139/85302/5872/1495/5317/10207/7043/8777/7122/9253/55243/4641/286676/1832/23396/222256/7414/4771/6833/91862/421/57555/7368/7082/29841/2706/8502/654/79191/2697/3728/3553/5010/23136/8642/79629/5318/187/79987/9231/4867/1003/10979/389/5657/83700/1894/6722/402/24146/83660/1501/1000"
## [3] "55214/55323/4327/79148/4239/54206/4323/5159/1514/1289/7043/5045/11117/1471/90993/2191/9902/7349/57333/4320/1278/871/652/1277/1513/861/4314/8510/114757/8751/9697/4880/4318/55512/4312/51430/4313/5653/63894/5657/10893/23371/5467/5645"                                                                                                                                                                                                        
## [4] "4070/3082/6591/7422/9241/9175/367/7474/2252/59277/2263/6662/4192/23213/652/9500/120892/6422/655/2099/7472/30812/26585/6495/55727/6474"                                                                                                                                                                                                                                                                                                         
## [5] "2331/81792/7042/4015/1281/4921/1290/30008/1289/1805/11117/10491/60681/1303/165/4060/84171/4320/1278/871/6423/1277/4017/84695/7837/1311/4016/26585/1545/50509/63894/7092/7373"
match(expanlimpreerrPAgsea[1:5,]$core_enrichment, expanlimpreerallEntrez$SYMBOL)
## [1] NA NA NA NA NA

This code chunk performs GSEA on the ttexpanpreer object; an object describing the expression differences between ER+ E tumours and ER+ NE tumours.

TNBC pre-treatment E vs NE samples

sDPpretnbc = sampleDataPatient[c(2:3,6:8,11:12,16,21,25:26),]
expDatpretnbc = expDatpre[,c(2:3,6:8,11:12,16,21,25:26)]

expanpretnbc = sDPpretnbc$expansion  

expanpretnbc = as.matrix(expanpretnbc)
expanpretnbc = ifelse(sDPpretnbc$expansion=="E",1,2)
expanpretnbc = as.matrix(expanpretnbc)
rownames(expanpretnbc) = rownames(sDPpretnbc)
designexpanrownamespretnbc = sort(rownames(expanpretnbc))

designexpanpretnbc = model.matrix(~expanpretnbc)
rownames(designexpanpretnbc) = designexpanrownamespretnbc
colnames(designexpanpretnbc) = c("Intercept", "NEis2")

expanlimpretnbc = lmFit(expDatpretnbc, designexpanpretnbc)
expanlimpretnbc = eBayes(expanlimpretnbc)
ttexpanpretnbc = topTable(expanlimpretnbc, coef=2, adjust.method = "BH", n=nrow(expDatpretnbc))
ttexpanpretnbc[1:10,]
##                logFC    AveExpr         t      P.Value  adj.P.Val         B
## HIST1H2AI  -5.497845 -1.3077502 -9.852600 5.699350e-07 0.01441252 2.5242235
## EBF3        4.838823 -2.3039121  7.272587 1.209106e-05 0.14816497 1.3428140
## LINC01484  -3.864636 -2.1709588 -6.993953 1.757731e-05 0.14816497 1.1711317
## SLC30A3    -3.803372 -1.8459799 -6.509260 3.450151e-05 0.21623945 0.8464402
## AL606807.1 -5.014666 -0.2066464 -6.359590 4.275535e-05 0.21623945 0.7390703
## EDA         3.613056  0.1180454  6.049604 6.731470e-05 0.22556775 0.5053754
## TSSK3       3.939649 -2.5363689  6.016979 7.066257e-05 0.22556775 0.4798655
## FOXJ1       4.967544 -2.2337008  6.010393 7.135962e-05 0.22556775 0.4746943
## AL138720.1 -4.569516 -1.8505591 -5.868717 8.826514e-05 0.24800542 0.3616880
## AC026369.3 -4.402078 -1.9266673 -5.590413 1.351205e-04 0.34169264 0.1296542
ttexpanpretnbc[1:10,]
##                logFC    AveExpr         t      P.Value  adj.P.Val         B
## HIST1H2AI  -5.497845 -1.3077502 -9.852600 5.699350e-07 0.01441252 2.5242235
## EBF3        4.838823 -2.3039121  7.272587 1.209106e-05 0.14816497 1.3428140
## LINC01484  -3.864636 -2.1709588 -6.993953 1.757731e-05 0.14816497 1.1711317
## SLC30A3    -3.803372 -1.8459799 -6.509260 3.450151e-05 0.21623945 0.8464402
## AL606807.1 -5.014666 -0.2066464 -6.359590 4.275535e-05 0.21623945 0.7390703
## EDA         3.613056  0.1180454  6.049604 6.731470e-05 0.22556775 0.5053754
## TSSK3       3.939649 -2.5363689  6.016979 7.066257e-05 0.22556775 0.4798655
## FOXJ1       4.967544 -2.2337008  6.010393 7.135962e-05 0.22556775 0.4746943
## AL138720.1 -4.569516 -1.8505591 -5.868717 8.826514e-05 0.24800542 0.3616880
## AC026369.3 -4.402078 -1.9266673 -5.590413 1.351205e-04 0.34169264 0.1296542
##

ttexpanpretnbcsig=ttexpanpretnbc[ttexpanpretnbc$adj.P.Val<0.05,]
ttexpanpretnbcsig[1:10,]
##               logFC  AveExpr       t     P.Value  adj.P.Val        B
## HIST1H2AI -5.497845 -1.30775 -9.8526 5.69935e-07 0.01441252 2.524224
## NA               NA       NA      NA          NA         NA       NA
## NA.1             NA       NA      NA          NA         NA       NA
## NA.2             NA       NA      NA          NA         NA       NA
## NA.3             NA       NA      NA          NA         NA       NA
## NA.4             NA       NA      NA          NA         NA       NA
## NA.5             NA       NA      NA          NA         NA       NA
## NA.6             NA       NA      NA          NA         NA       NA
## NA.7             NA       NA      NA          NA         NA       NA
## NA.8             NA       NA      NA          NA         NA       NA
rownames(ttexpanpretnbcsig)
## [1] "HIST1H2AI"
match("SLC4A1", rownames(ttexpanpretnbc))
## [1] 83
ttexpanpretnbc[83,]
##           logFC   AveExpr        t    P.Value adj.P.Val         B
## SLC4A1 4.578623 -2.445839 3.923828 0.00216389 0.4236858 -1.555428
match("CD163L1", rownames(ttexpanpretnbc))
## [1] 13900
ttexpanpretnbc[13900,]
##              logFC  AveExpr          t   P.Value adj.P.Val         B
## CD163L1 -0.8979793 1.481341 -0.7720382 0.4555772 0.8287393 -5.153734
match("AL031733.2", rownames(ttexpanpretnbc))
## [1] 542
ttexpanpretnbc[542,]
##               logFC   AveExpr        t    P.Value adj.P.Val         B
## AL031733.2 2.749687 -3.443441 2.747111 0.01822556 0.4236858 -3.012026
match("CREG1", rownames(ttexpanpretnbc))
## [1] 24360
ttexpanpretnbc[24360,]
##            logFC  AveExpr          t   P.Value adj.P.Val         B
## CREG1 0.02392132 5.954767 0.05454233 0.9574338 0.9938808 -5.404491
match("CD247", rownames(ttexpanpretnbc))
## [1] 12558
ttexpanpretnbc[12558,]
##            logFC  AveExpr         t  P.Value adj.P.Val         B
## CD247 -0.7494689 5.879397 -0.896188 0.388424 0.7821679 -5.069613
topgene = match(rownames(ttexpanpretnbc)[1], rownames( expDatpretnbc))
t.test (expDatpretnbc[topgene,]~designexpanpretnbc[,2])
## 
##  Welch Two Sample t-test
## 
## data:  expDatpretnbc[topgene, ] by designexpanpretnbc[, 2]
## t = 10.267, df = 8.8823, p-value = 3.18e-06
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  4.283988 6.711702
## sample estimates:
## mean in group 1 mean in group 2 
##        1.691074       -3.806770
expDatpretnbc[18346,]
## T_BIOKEY_10_Pre T_BIOKEY_11_Pre T_BIOKEY_14_Pre T_BIOKEY_15_Pre T_BIOKEY_16_Pre 
##       1.3528008       1.3422312      -4.3294198       0.9451816       1.9593753 
## T_BIOKEY_19_Pre  T_BIOKEY_2_Pre T_BIOKEY_26_Pre T_BIOKEY_31_Pre  T_BIOKEY_8_Pre 
##      -4.8881896      -2.8683144      -4.1257380       2.8557819      -2.2277830 
##  T_BIOKEY_9_Pre 
##      -4.4011782
ttexpanpretnbc[1,]
##               logFC  AveExpr       t     P.Value  adj.P.Val        B
## HIST1H2AI -5.497845 -1.30775 -9.8526 5.69935e-07 0.01441252 2.524224

In this analysis I also used pre-treatment tumour samples, now comparing TNBC E vs TNBC NE. I was hoping that there may be some overlap in the results between this analysis and the previous analysis but this was not the case.

From this we can see that group 1 (E) had a higher mean expression than group 2 (NE). Because the logFC is -5, this means that the logFC function is showing the relative FC of group 2 compared to group 1, meaning that a negative logFC means that group 1 is higher than group 2.

GSEA tnbc pre E vs NE

ttexpanpretnbcnames = rownames(ttexpanpretnbc[1:200,])


##

expanlimpretnbcstats = expanlimpretnbc$t[,2]
expanlimpretnbcstats %>% sort(., decreasing=TRUE) %>% head()
##     EBF3      EDA    TSSK3    FOXJ1    GPR20    NXPH3 
## 7.272587 6.049604 6.016979 6.010393 5.449755 5.251460
expanlimpretnbcallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpretnbcstats), keytype="SYMBOL", columns="ENTREZID")

names(expanlimpretnbcstats) = expanlimpretnbcallEntrez$ENTREZID[ match(names(expanlimpretnbcstats), expanlimpretnbcallEntrez$SYMBOL)]
 

expanlimpretnbcstats %>% sort(., decreasing=TRUE) %>% head()
##   253738     1896    81629     2302     2843    11248 
## 7.272587 6.049604 6.016979 6.010393 5.449755 5.251460
expanlimpretnbcstats_sort = sort(expanlimpretnbcstats, decreasing=TRUE)
 
expanlimpretnbcrPAgsea = gseGO(expanlimpretnbcstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (16.41% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpretnbcrPAgsea[1:5,] %>% knitr::kable()
ID Description setSize enrichmentScore NES pvalue p.adjust qvalues rank leading_edge core_enrichment
GO:0050871 GO:0050871 positive regulation of B cell activation 144 -0.4979054 -2.650106 0.0012547 0.0131793 0.0095665 4523 tags=44%, list=18%, signal=36% 28439/79155/8741/28391/28638/28426/28432/3493/28414/10451/102723170/28454/28395/10673/30009/7494/28444/28392/55795/4292/100/28461/28455/100423062/28394/3574/148022/57289/56941/952/939/2048/28434/51111/28473/28412/3503/7293/28452/3538/64127/3500/7292/28467/7037/3539/28396/28400/28442/28408/3501/84787/28429/28388/28449/3514/79915/3502/28385/3507/28386/28420/59067
GO:0002228 GO:0002228 natural killer cell mediated immunity 75 -0.5479287 -2.604583 0.0007042 0.0131793 0.0095665 7234 tags=69%, list=29%, signal=50% 1054/4068/5817/5777/387837/5272/56253/7409/634/3133/114836/6777/11126/2214/203068/23207/3952/10125/201294/146850/259197/3823/1130/57823/3824/3105/22914/55207/10225/6845/100507436/5052/2671/10666/9437/4818/127544/5873/3965/3135/3106/3822/3134/3821/3592/84868/116449/3805/8302/3902/3002/59067
GO:0002715 GO:0002715 regulation of natural killer cell mediated immunity 50 -0.5915431 -2.601694 0.0005429 0.0131793 0.0095665 7234 tags=74%, list=29%, signal=53% 4068/5817/387837/5272/56253/7409/634/3133/114836/6777/11126/3952/10125/146850/259197/3823/3824/3105/22914/10225/100507436/2671/10666/9437/3965/3135/3106/3822/3134/3821/3592/84868/116449/3805/8302/3902/59067
GO:0099024 GO:0099024 plasma membrane invagination 129 -0.4980004 -2.597652 0.0011403 0.0131793 0.0095665 4364 tags=41%, list=17%, signal=34% 28391/28638/28426/28432/3493/28414/102723170/28454/28395/6845/2209/28444/4481/28392/718/28461/8685/28455/100423062/51429/28394/94134/246/57289/1182/9212/54209/3684/28434/28473/28412/3503/28452/3538/54784/3500/28467/3539/28396/28400/28442/28408/3501/28429/28388/28449/3514/3502/28385/3507/28386/28420/84501
GO:0042267 GO:0042267 natural killer cell mediated cytotoxicity 72 -0.5441619 -2.580646 0.0006780 0.0131793 0.0095665 7371 tags=69%, list=29%, signal=49% 1054/4068/5817/5777/387837/5272/56253/7409/634/3133/114836/6777/11126/2214/203068/23207/3952/10125/201294/146850/259197/3823/1130/57823/3824/3105/22914/55207/6845/100507436/5052/2671/10666/9437/4818/127544/5873/3965/3135/3106/3822/3134/3821/3592/84868/3805/8302/3902/3002/59067

This code chunk performs GSEA on the ttexpanpretnbc object; an object describing the expression differences between TNBC E tumours and ER+ NE tumours.

pre-treatment ER+E vs NE in cancer cells

# incorrect now: expDatoncan = expDatcan[,c(1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61)]

load(file = "Cancercells.R", verbose = T)
## Loading objects:
##   delta2000can
##   brcaDatcancer
##   expDatcan
##   sampleDataPatientcan
##   deltagcan
##   sampleData3can
expDatprecan = expDatcan[,c(2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,43,45,47,49, 51, 53,55,57,59,61)]

delta2000can = delta2000can[,c(1:12,14:15,17,19:21,23:28,30:31)]
sampleDataPatientcan = sampleDataPatientcan[c(1:12,14:15,17,19:21,23:28,30:31),]
deltagcan = deltagcan[,c(1:12,14:15,17,19:21,23:28,30:31)]

expDatprecan = expDatprecan[,c(1:12,14:15,17,19:21,23:28,30:31)]
head(expDatprecan)
##          BIOKEY_1_Pre BIOKEY_10_Pre BIOKEY_11_Pre BIOKEY_12_Pre BIOKEY_13_Pre
## A1BG         5.189745      5.778444     5.3804580      7.837913     5.6578037
## A1BG-AS1    -2.904325      3.237434    -0.8377502      1.824113     0.3086709
## A2M          5.112514      6.360532     3.1922818      2.491418     6.1229425
## A2M-AS1     -2.904325      1.603547    -2.3194404     -1.301041     0.3086709
## A4GALT       3.334154      2.854213    -2.3194404     -1.301041     2.3863003
## AAAS         5.598131      5.000118     4.9243512      5.646514     2.8567338
##          BIOKEY_14_Pre BIOKEY_15_Pre BIOKEY_16_Pre BIOKEY_17_Pre BIOKEY_18_Pre
## A1BG          5.618258      4.889832      3.808299      7.519939     5.9619407
## A1BG-AS1     -1.287484      1.687723     -1.997482      2.272714     3.8049197
## A2M           5.407930      7.001283      5.865341     -1.718659     6.4816015
## A2M-AS1      -1.287484     -3.138754      1.200966     -1.718659     0.3088695
## A4GALT        2.724065      3.576517      4.871761      3.194614     0.3088695
## AAAS          4.331908      4.498659      4.086608      4.206636     0.3088695
##          BIOKEY_19_Pre BIOKEY_2_Pre BIOKEY_21_Pre BIOKEY_22_Pre BIOKEY_24_Pre
## A1BG          4.223311     6.755717      6.951614      6.036091     6.8242042
## A1BG-AS1      1.283304     4.266240      2.999935      2.643389    -0.7336224
## A2M           6.840791    10.010324      5.643961      5.758353     5.4778787
## A2M-AS1      -1.943101    -2.260760     -2.301855      1.530182    -0.7336224
## A4GALT       -1.943101    -2.260760      5.298380     -0.634512    -0.7336224
## AAAS          3.524348     3.781572      4.385602      1.702773     4.3562931
##          BIOKEY_26_Pre BIOKEY_27_Pre BIOKEY_28_Pre BIOKEY_3_Pre BIOKEY_30_Pre
## A1BG          6.479746      2.819079     2.8176785     5.795977      6.626559
## A1BG-AS1      2.642840      1.469418    -0.3561496     4.733205      3.030535
## A2M           8.802318      5.313485     7.3923511     5.024763      2.428164
## A2M-AS1       1.529933     -1.587742    -0.3561496     1.851863      3.177939
## A4GALT        2.088355      4.029231    -0.3561496     1.134656     -1.905051
## AAAS          3.783555      5.010529     5.0527833     1.851863      4.996336
##          BIOKEY_31_Pre BIOKEY_4_Pre BIOKEY_5_Pre BIOKEY_6_Pre BIOKEY_8_Pre
## A1BG          6.923018     5.059528    5.2189175     4.709453     1.062831
## A1BG-AS1      2.595002     2.912328   -0.3545796    -1.405573     4.126984
## A2M           6.664599     3.054358    3.2312428     5.302210     1.062831
## A2M-AS1      -3.357334    -2.008088   -0.3545796    -1.405573     1.062831
## A4GALT        5.923867    -2.008088   -0.3545796    -1.405573     2.781910
## AAAS          4.114240     4.086183   -0.3545796     4.564730     5.477825
##          BIOKEY_9_Pre
## A1BG         5.041679
## A1BG-AS1     1.976117
## A2M          8.587659
## A2M-AS1     -3.568063
## A4GALT       4.871578
## AAAS         4.257223
sDPpreercan = sampleDataPatientcan[c(4,9:10,13:15,17,19:20,22:24),]
expDatpreercan = expDatprecan[,c(4,9:10,13:15,17,19:20,22:24)]

expanpreercan = sDPpreercan$expansion  

expanpreercan = as.matrix(expanpreercan)
expanpreercan = ifelse(sDPpreercan$expansion=="E",1,2)
expanpreercan = as.matrix(expanpreercan)
rownames(expanpreercan) = rownames(sDPpreercan)
designexpanrownamespreercan = sort(rownames(expanpreercan))

designexpanpreercan = model.matrix(~expanpreercan)
rownames(designexpanpreercan) = designexpanrownamespreercan
colnames(designexpanpreercan) = c("Intercept", "NEis2")

expanlimpreercan = lmFit(expDatpreercan, designexpanpreercan)
expanlimpreercan = eBayes(expanlimpreercan)
ttexpanpreercan = topTable(expanlimpreercan, coef=2, adjust.method = "BH", n=nrow(expDatpreercan))

View(ttexpanpreercan[1:33,])



ttexpanpreercansig=ttexpanpreercan[ttexpanpreercan$adj.P.Val<0.05,]
ttexpanpreercansig[1:10,]
##              logFC  AveExpr        t      P.Value  adj.P.Val        B
## STX8      5.081200 4.262730 8.939391 6.878462e-07 0.01011336 5.495693
## TIMM22    5.152573 3.415512 8.819872 7.998544e-07 0.01011336 5.385673
## ARHGEF7   3.962487 3.294395 7.929888 2.583798e-06 0.01883735 4.505027
## MKRN2     3.585640 3.068773 7.673424 3.684436e-06 0.01883735 4.230009
## PARP3     4.200447 3.860108 7.654049 3.785764e-06 0.01883735 4.208828
## BORCS5    4.458877 2.895241 7.486545 4.795736e-06 0.01883735 4.023287
## DHRS12    4.383978 2.839067 7.328885 6.010422e-06 0.01883735 3.844644
## FBXO34    3.931405 2.499637 7.282181 6.430006e-06 0.01883735 3.790968
## DNAJC3-DT 3.369904 2.874785 7.253374 6.704212e-06 0.01883735 3.757687
## SRP14-AS1 3.896434 2.473408 7.067411 8.801096e-06 0.02225621 3.539626
rownames(ttexpanpreercansig)
##  [1] "STX8"      "TIMM22"    "ARHGEF7"   "MKRN2"     "PARP3"     "BORCS5"   
##  [7] "DHRS12"    "FBXO34"    "DNAJC3-DT" "SRP14-AS1" "URGCP"     "ARL14EP"  
## [13] "CD163L1"   "TCF4"      "PIGBOS1"   "HUS1"      "WASHC3"    "MEPCE"    
## [19] "SEPT10"    "KIAA0753"  "C15orf48"  "NUDT15"    "CARMIL1"   "TRPM7"    
## [25] "NAV1"      "UBE2D4"    "ESCO1"     "FKBP9"     "GALT"      "UACA"     
## [31] "IL13RA1"   "IGLV5-45"
match("SLC4A1", rownames(ttexpanpreercan))
## [1] 942
ttexpanpreercan[942,]
##            logFC    AveExpr         t    P.Value adj.P.Val         B
## SLC4A1 -2.053596 -0.9681969 -2.968602 0.01095174 0.1460704 -2.658614
match("CD163L1", rownames(ttexpanpreercan))
## [1] 13
ttexpanpreercan[13,]
##            logFC AveExpr        t      P.Value  adj.P.Val        B
## CD163L1 4.545983 2.96057 6.731923 1.454488e-05 0.02718451 3.131889
match("HIST1H2AI", rownames(ttexpanpreercan))
## [1] 10855
ttexpanpreercan[10855,]
##               logFC    AveExpr         t   P.Value adj.P.Val         B
## HIST1H2AI -2.128142 -0.3650483 -1.706752 0.1118023 0.2604566 -4.673713
topgene = match(rownames(ttexpanpreercan)[1], rownames( expDatpreercan))
t.test (expDatpreercan[topgene,]~designexpanpreercan[,2])
## 
##  Welch Two Sample t-test
## 
## data:  expDatpreercan[topgene, ] by designexpanpreercan[, 2]
## t = -8.8761, df = 3.0804, p-value = 0.002716
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -6.876416 -3.285984
## sample estimates:
## mean in group 1 mean in group 2 
##       0.4518303       5.5330304
topgene
## [1] 13732
expDatpreercan[topgene,]
## BIOKEY_12_Pre BIOKEY_17_Pre BIOKEY_18_Pre BIOKEY_21_Pre BIOKEY_22_Pre 
##     1.4012010     6.6023405     0.3088695     4.8386326     4.4560160 
## BIOKEY_24_Pre BIOKEY_27_Pre  BIOKEY_3_Pre BIOKEY_30_Pre  BIOKEY_4_Pre 
##     5.3912070     6.6896995     4.8779145     5.8536729     5.7091740 
##  BIOKEY_5_Pre  BIOKEY_6_Pre 
##    -0.3545796     5.3786163
expDatpreercan["STX8",]
## BIOKEY_12_Pre BIOKEY_17_Pre BIOKEY_18_Pre BIOKEY_21_Pre BIOKEY_22_Pre 
##     1.4012010     6.6023405     0.3088695     4.8386326     4.4560160 
## BIOKEY_24_Pre BIOKEY_27_Pre  BIOKEY_3_Pre BIOKEY_30_Pre  BIOKEY_4_Pre 
##     5.3912070     6.6896995     4.8779145     5.8536729     5.7091740 
##  BIOKEY_5_Pre  BIOKEY_6_Pre 
##    -0.3545796     5.3786163
ttexpanpreercan[1,]
##       logFC AveExpr        t      P.Value  adj.P.Val        B
## STX8 5.0812 4.26273 8.939391 6.878462e-07 0.01011336 5.495693
designexpanpreercan
##               Intercept NEis2
## BIOKEY_12_Pre         1     1
## BIOKEY_17_Pre         1     2
## BIOKEY_18_Pre         1     1
## BIOKEY_21_Pre         1     2
## BIOKEY_22_Pre         1     2
## BIOKEY_24_Pre         1     2
## BIOKEY_27_Pre         1     2
## BIOKEY_3_Pre          1     2
## BIOKEY_30_Pre         1     2
## BIOKEY_4_Pre          1     2
## BIOKEY_5_Pre          1     1
## BIOKEY_6_Pre          1     2
## attr(,"assign")
## [1] 0 1

From here, group 1 (E) gas a lower expression than group 2 (NE).

GSEA er+ pre E vs NE in cancer cells

ttexpanpreernamescan = rownames(ttexpanpreercan[1:200,])

##

expanlimpreercanstats = expanlimpreercan$t[,2]
expanlimpreercanstats %>% sort(., decreasing=TRUE) %>% head()
##     STX8   TIMM22  ARHGEF7    MKRN2    PARP3   BORCS5 
## 8.939391 8.819872 7.929888 7.673424 7.654049 7.486545
expanlimpreercanallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpreercanstats), keytype="SYMBOL", columns="ENTREZID")

names(expanlimpreercanstats) = expanlimpreercanallEntrez$ENTREZID[ match(names(expanlimpreercanstats), expanlimpreercanallEntrez$SYMBOL)]
 

expanlimpreercanstats %>% sort(., decreasing=TRUE) %>% head()
##     9482    29928     8874    23609    10039   118426 
## 8.939391 8.819872 7.929888 7.673424 7.654049 7.486545
expanlimpreercanstats_sort = sort(expanlimpreercanstats, decreasing=TRUE)
 
expanlimpreercanrPAgsea = gseGO(expanlimpreercanstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (33.75% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpreercanrPAgsea[1:5,] %>% knitr::kable()
ID Description setSize enrichmentScore NES pvalue p.adjust qvalues rank leading_edge core_enrichment
GO:0002181 GO:0002181 cytoplasmic translation 142 0.6319338 3.545761 0.0011050 0.0058476 0.0029331 5444 tags=68%, list=22%, signal=54% 56339/8891/10605/10492/8664/4733/6191/6173/6154/51611/7458/2332/6194/6124/170506/6139/51389/6168/6170/8661/6207/6160/8668/6134/92715/200916/6176/1983/1968/6138/6235/6202/6147/6122/80315/6209/4736/9147/5936/6208/8894/6228/6229/1939/10480/6232/7311/6223/6142/6159/6167/6234/3184/6164/55854/6146/6233/1973/6152/6136/6175/6161/3921/11224/120526/26986/6143/6205/6189/6222/132864/6188/6171/23521/6217/6125/6137/6210/6141/1660/6193/6130/6203/6165/6135/89978/6133/6144/9045/1801/8531/6132/6128/6206/6230/1974/25873
GO:0035966 GO:0035966 response to topologically incorrect protein 155 0.6135017 3.473464 0.0012469 0.0058476 0.0029331 4578 tags=50%, list=18%, signal=41% 101928527/51009/91445/871/7057/118424/5611/54788/5771/3338/5295/1388/23197/4287/57414/5610/3304/55432/65992/666/10956/22926/51528/55768/64764/1191/10551/9531/7009/387923/4780/3337/55161/2161/3303/23071/55741/10808/3309/7353/2033/23376/9709/55829/148327/10018/1965/7494/90993/27248/153222/9451/23645/9217/10347/1649/79956/11080/80279/10133/55757/3297/92305/51283/4189/1861/7873/27230/3320/3315/54870/10488/84236/89890/26608/55658/79139/84919
GO:0006986 GO:0006986 response to unfolded protein 134 0.6052674 3.377031 0.0010352 0.0058476 0.0029331 4578 tags=51%, list=18%, signal=42% 101928527/51009/871/7057/118424/5611/5771/3338/5295/1388/23197/57414/5610/3304/55432/65992/666/10956/22926/51528/64764/10551/9531/7009/387923/4780/3337/55161/3303/23071/55741/10808/3309/2033/23376/9709/55829/148327/10018/1965/7494/90993/27248/153222/9451/23645/9217/10347/1649/79956/11080/80279/10133/3297/92305/51283/4189/7873/27230/3320/3315/54870/10488/84236/89890/26608/79139/84919
GO:0035967 GO:0035967 cellular response to topologically incorrect protein 113 0.6168627 3.319020 0.0009124 0.0058476 0.0029331 4578 tags=51%, list=18%, signal=42% 101928527/51009/91445/54788/5771/5295/1388/4287/57414/5610/3304/55432/65992/666/10956/22926/55768/64764/10551/9531/7009/387923/4780/55161/3303/3309/7353/2033/23376/9709/55829/148327/10018/1965/7494/90993/27248/9451/23645/9217/10347/1649/79956/80279/10133/55757/3297/51283/4189/1861/27230/54870/10488/84236/26608/55658/79139/84919
GO:0030968 GO:0030968 endoplasmic reticulum unfolded protein response 73 0.6453638 3.243673 0.0006337 0.0058476 0.0029331 5085 tags=63%, list=20%, signal=50% 101928527/51009/5771/5295/1388/57414/5610/55432/65992/666/10956/22926/64764/10551/387923/4780/55161/3309/2033/23376/9709/55829/148327/10018/1965/7494/90993/27248/9451/23645/9217/10347/1649/79956/80279/51283/4189/27230/54870/10488/26608/79139/84919/10399/30001/595

tnbc pre-treatment E vs NE in cancer cells

sDPpretnbccan = sampleDataPatientcan[c(2:3,6:8,11:12,16,21,25:26),]
expDatpretnbccan = expDatprecan[,c(2:3,6:8,11:12,16,21,25:26)]

expanpretnbccan = sDPpretnbccan$expansion  

expanpretnbccan = as.matrix(expanpretnbccan)
expanpretnbccan = ifelse(sDPpretnbccan$expansion=="E",1,2)
expanpretnbccan = as.matrix(expanpretnbccan)
rownames(expanpretnbccan) = rownames(sDPpretnbccan)
designexpanrownamespretnbccan = sort(rownames(expanpretnbccan))

designexpanpretnbccan = model.matrix(~expanpretnbccan)
rownames(designexpanpretnbccan) = designexpanrownamespretnbccan
colnames(designexpanpretnbccan) = c("Intercept", "NEis2")

expanlimpretnbccan = lmFit(expDatpretnbccan, designexpanpretnbccan)
expanlimpretnbccan = eBayes(expanlimpretnbccan)
ttexpanpretnbccan = topTable(expanlimpretnbccan, coef=2, adjust.method = "BH", n=nrow(expDatpretnbccan))
ttexpanpretnbccan[1:10,]
##                logFC     AveExpr         t      P.Value adj.P.Val         B
## LINC01133  -5.770864  1.32930502 -5.906893 3.427880e-05 0.5206684 -4.538141
## RNF128      5.677530  0.15914761  5.397415 8.577419e-05 0.5206684 -4.541577
## SPIRE2     -4.440320  0.05592086 -5.054427 1.621878e-04 0.5206684 -4.544215
## CXCL13     -6.171653  2.10934256 -4.940096 2.012310e-04 0.5206684 -4.545158
## NXPH3       4.004697 -1.04199544  4.750414 2.888288e-04 0.5206684 -4.546798
## AP001453.2 -4.198955  2.58323242 -4.604544 3.824539e-04 0.5206684 -4.548124
## PHYHD1      5.080545  1.88104398  4.565342 4.125962e-04 0.5206684 -4.548490
## CCNE2      -3.594409  2.46574215 -4.485047 4.822055e-04 0.5206684 -4.549254
## CES3       -4.378754  0.02793643 -4.471602 4.949919e-04 0.5206684 -4.549384
## SLC4A1      4.826934 -0.59350249  4.390418 5.799620e-04 0.5206684 -4.550179
ttexpanpretnbccan[1:10,]
##                logFC     AveExpr         t      P.Value adj.P.Val         B
## LINC01133  -5.770864  1.32930502 -5.906893 3.427880e-05 0.5206684 -4.538141
## RNF128      5.677530  0.15914761  5.397415 8.577419e-05 0.5206684 -4.541577
## SPIRE2     -4.440320  0.05592086 -5.054427 1.621878e-04 0.5206684 -4.544215
## CXCL13     -6.171653  2.10934256 -4.940096 2.012310e-04 0.5206684 -4.545158
## NXPH3       4.004697 -1.04199544  4.750414 2.888288e-04 0.5206684 -4.546798
## AP001453.2 -4.198955  2.58323242 -4.604544 3.824539e-04 0.5206684 -4.548124
## PHYHD1      5.080545  1.88104398  4.565342 4.125962e-04 0.5206684 -4.548490
## CCNE2      -3.594409  2.46574215 -4.485047 4.822055e-04 0.5206684 -4.549254
## CES3       -4.378754  0.02793643 -4.471602 4.949919e-04 0.5206684 -4.549384
## SLC4A1      4.826934 -0.59350249  4.390418 5.799620e-04 0.5206684 -4.550179
ttexpanpretnbccansig=ttexpanpretnbccan[ttexpanpretnbccan$adj.P.Val<0.05,]
ttexpanpretnbccansig[1:10,]
##      logFC AveExpr  t P.Value adj.P.Val  B
## NA      NA      NA NA      NA        NA NA
## NA.1    NA      NA NA      NA        NA NA
## NA.2    NA      NA NA      NA        NA NA
## NA.3    NA      NA NA      NA        NA NA
## NA.4    NA      NA NA      NA        NA NA
## NA.5    NA      NA NA      NA        NA NA
## NA.6    NA      NA NA      NA        NA NA
## NA.7    NA      NA NA      NA        NA NA
## NA.8    NA      NA NA      NA        NA NA
## NA.9    NA      NA NA      NA        NA NA
rownames(ttexpanpretnbccansig)
## character(0)
match("PTN", rownames(ttexpanpretnbccan))
## [1] 26
ttexpanpretnbccan[26,]
##        logFC  AveExpr        t     P.Value adj.P.Val         B
## PTN 4.856942 3.438396 4.005355 0.001239745 0.5206684 -4.554211
match("SLC4A1", rownames(ttexpanpretnbccan))
## [1] 10
ttexpanpretnbccan[10,]
##           logFC    AveExpr        t     P.Value adj.P.Val         B
## SLC4A1 4.826934 -0.5935025 4.390418 0.000579962 0.5206684 -4.550179
match("CD163L1", rownames(ttexpanpretnbccan))
## [1] 17601
ttexpanpretnbccan[17601,]
##              logFC  AveExpr          t   P.Value adj.P.Val         B
## CD163L1 -0.6759196 2.715055 -0.5248446 0.6076904 0.8730681 -4.599233
match("HIST1H2AI", rownames(ttexpanpretnbccan))
## [1] 97
ttexpanpretnbccan[97,]
##               logFC    AveExpr         t    P.Value adj.P.Val         B
## HIST1H2AI -3.577364 -0.3363316 -3.288589 0.00522041 0.5206684 -4.562914
topgene = match(rownames(ttexpanpretnbccan)[1], rownames( expDatpretnbccan))
t.test (expDatpretnbccan[topgene,]~designexpanpretnbccan[,2])
## 
##  Welch Two Sample t-test
## 
## data:  expDatpretnbccan[topgene, ] by designexpanpretnbccan[, 2]
## t = 5.7723, df = 8.8007, p-value = 0.0002924
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  3.501445 8.040282
## sample estimates:
## mean in group 1 mean in group 2 
##        4.477049       -1.293815
topgene
## [1] 8041
expDatpretnbccan[topgene,]
## BIOKEY_10_Pre BIOKEY_11_Pre BIOKEY_14_Pre BIOKEY_15_Pre BIOKEY_16_Pre 
##     6.5472501     2.8044262    -1.2874836     5.2619773     2.8906423 
## BIOKEY_19_Pre  BIOKEY_2_Pre BIOKEY_26_Pre BIOKEY_31_Pre  BIOKEY_8_Pre 
##    -1.9431012    -2.2607596     0.2336873     4.8809486     1.0628306 
##  BIOKEY_9_Pre 
##    -3.5680628
ttexpanpretnbccan[1,]
##               logFC  AveExpr         t     P.Value adj.P.Val         B
## LINC01133 -5.770864 1.329305 -5.906893 3.42788e-05 0.5206684 -4.538141
designexpanpretnbccan
##               Intercept NEis2
## BIOKEY_10_Pre         1     1
## BIOKEY_11_Pre         1     1
## BIOKEY_14_Pre         1     2
## BIOKEY_15_Pre         1     1
## BIOKEY_16_Pre         1     1
## BIOKEY_19_Pre         1     2
## BIOKEY_2_Pre          1     2
## BIOKEY_26_Pre         1     2
## BIOKEY_31_Pre         1     1
## BIOKEY_8_Pre          1     2
## BIOKEY_9_Pre          1     2
## attr(,"assign")
## [1] 0 1

negative logFC: group 1 (E) is higher than group 2 (NE)

GSEA tnbc pre E vs NE in cancer cells

ttexpanpretnbcnamescan = rownames(ttexpanpretnbccan[1:200,])

##

expanlimpretnbccanstats = expanlimpretnbccan$t[,2]
expanlimpretnbccanstats %>% sort(., decreasing=TRUE) %>% head()
##   RNF128    NXPH3   PHYHD1   SLC4A1     CHAD   IL17RB 
## 5.397415 4.750414 4.565342 4.390418 4.380305 4.324675
expanlimpretnbccanallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpretnbccanstats), keytype="SYMBOL", columns="ENTREZID")

names(expanlimpretnbccanstats) = expanlimpretnbccanallEntrez$ENTREZID[ match(names(expanlimpretnbccanstats), expanlimpretnbccanallEntrez$SYMBOL)]
 

expanlimpretnbccanstats %>% sort(., decreasing=TRUE) %>% head()
##    79589    11248   254295     6521     1101    55540 
## 5.397415 4.750414 4.565342 4.390418 4.380305 4.324675
expanlimpretnbccanstats_sort = sort(expanlimpretnbccanstats, decreasing=TRUE)
 
expanlimpretnbccanrPAgsea = gseGO(expanlimpretnbccanstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (23.67% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpretnbccanrPAgsea[1:5,] %>% knitr::kable()
ID Description setSize enrichmentScore NES pvalue p.adjust qvalues rank leading_edge core_enrichment
GO:0006119 GO:0006119 oxidative phosphorylation 117 -0.5622125 -3.185613 0.0010030 0.0091503 0.0054669 5575 tags=58%, list=22%, signal=46% 4728/6391/513/10105/4715/4702/27089/539/51085/10632/7384/51142/1327/55967/4701/7388/1339/4708/522/374291/9167/29796/4710/4714/79736/4694/1329/4726/10476/9551/4711/2395/637/4704/509/57017/7386/84300/2631/201626/4697/170712/7385/4716/4706/1716/400916/4724/26471/1738/983/4713/4709/891/790955/440567/92014/4718/4436/4722/205327/4700/54949/79085/215/6392/84275/246269
GO:0022904 GO:0022904 respiratory electron transport chain 96 -0.5674612 -3.102089 0.0008354 0.0091503 0.0054669 5152 tags=55%, list=20%, signal=44% 4715/4702/27089/83733/51142/1327/55967/4701/7388/144363/1339/4708/374291/9167/6648/29796/4710/11232/2819/4714/4694/1329/4726/4711/637/4704/57017/7386/2110/4697/4716/4706/79751/1716/4724/6390/1738/983/4713/4709/891/790955/440567/92014/4718/4722/2629/8604/4700/54949/6392/2109/246269
GO:0048002 GO:0048002 antigen processing and presentation of peptide antigen 58 -0.6285682 -3.100335 0.0005731 0.0091503 0.0054669 4317 tags=59%, list=17%, signal=49% 1785/811/3115/3105/50856/3112/3109/3133/6892/51752/563/1636/3113/3123/6556/3416/972/29108/54842/567/3122/3117/6890/6891/3111/3108/3106/3127/3107/23457/3119/3134/2207/3135
GO:0033108 GO:0033108 mitochondrial respiratory chain complex assembly 87 -0.5700827 -3.057271 0.0007831 0.0091503 0.0054669 6366 tags=61%, list=25%, signal=46% 54902/57001/6834/25813/29078/4728/84987/29090/4715/4702/51241/100303755/55967/6341/4708/493753/374291/28976/131474/90871/4710/100131801/4714/116228/4694/55471/4711/4704/7386/54539/51204/84300/126328/4716/285521/4706/4724/4713/4709/790955/51287/4718/4722/284184/137682/84233/4700/54949/51079/9131/84275/55863/79072
GO:0010257 GO:0010257 NADH dehydrogenase complex assembly 52 -0.6361512 -3.048276 0.0005559 0.0091503 0.0054669 5575 tags=58%, list=22%, signal=45% 4728/29090/4715/4702/55967/4708/374291/28976/90871/4710/4714/4694/55471/4711/4704/54539/126328/4716/4706/4724/4713/4709/4718/4722/284184/137682/84233/4700/51079/55863

pre-treatment ER+E vs NE in T cells

setwd("/Volumes/archive/cancergeneticslab/MikBlack/JudyJewell/brca-scrnaseq")
load(file = "Tcells.R", verbose = T)
## Loading objects:
##   delta2000tcell
##   brcaDattcell
##   expDattcell
##   sampleDataPatienttcell
##   sampleData3tcell
##   expDattcellpre
##   expDattcellon
colnames(delta2000tcell)
##  [1] "T_BIOKEY1delta"   "T_BIOKEY10delta"  "E_BIOKEY11delta"  "E_BIOKEY12delta" 
##  [5] "E_BIOKEY13delta"  "E_BIOKEY614delta" "E_BIOKEY15delta"  "T_BIOKEY16delta" 
##  [9] "T_BIOKEY17delta"  "T_BIOKEY18delta"  "T_BIOKEY19delta"  "E_BIOKEY2delta"  
## [13] "H_BIOKEY20delta"  "T_BIOKEY21delta"  "T_BIOKEY22delta"  "T_BIOKEY23delta" 
## [17] "E_BIOKEY24delta"  "E_BIOKEY25delta"  "T_BIOKEY26delta"  "E_BIOKEY27delta" 
## [21] "E_BIOKEY28delta"  "E_BIOKEY29delta"  "H_BIOKEY3delta"   "E_BIOKEY30delta" 
## [25] "T_BIOKEY31delta"  "T_BIOKEY4delta"   "E_BIOKEY5delta"   "H_BIOKEY6delta"  
## [29] "E_BIOKEY7delta"   "E_BIOKEY8delta"   "T_BIOKEY9delta"
dim(expDattcell)
## [1] 25288    62
rownames(sampleData3tcell)
##  [1] "BIOKEY_13_Pre" "BIOKEY_10_Pre" "BIOKEY_16_Pre" "BIOKEY_14_Pre"
##  [5] "BIOKEY_19_Pre" "BIOKEY_23_Pre" "BIOKEY_26_Pre" "BIOKEY_28_Pre"
##  [9] "BIOKEY_3_Pre"  "BIOKEY_15_Pre" "BIOKEY_8_Pre"  "BIOKEY_5_Pre" 
## [13] "BIOKEY_30_Pre" "BIOKEY_12_Pre" "BIOKEY_1_Pre"  "BIOKEY_31_Pre"
## [17] "BIOKEY_20_Pre" "BIOKEY_22_Pre" "BIOKEY_25_Pre" "BIOKEY_21_Pre"
## [21] "BIOKEY_29_Pre" "BIOKEY_4_Pre"  "BIOKEY_9_Pre"  "BIOKEY_18_Pre"
## [25] "BIOKEY_11_Pre" "BIOKEY_7_Pre"  "BIOKEY_2_Pre"  "BIOKEY_6_Pre" 
## [29] "BIOKEY_17_Pre" "BIOKEY_27_Pre" "BIOKEY_24_Pre"
colnames(expDattcellon)
##  [1] "BIOKEY_1_On"  "BIOKEY_10_On" "BIOKEY_12_On" "BIOKEY_13_On" "BIOKEY_14_On"
##  [6] "BIOKEY_15_On" "BIOKEY_16_On" "BIOKEY_17_On" "BIOKEY_18_On" "BIOKEY_19_On"
## [11] "BIOKEY_2_On"  "BIOKEY_20_On" "BIOKEY_21_On" "BIOKEY_22_On" "BIOKEY_23_On"
## [16] "BIOKEY_24_On" "BIOKEY_25_On" "BIOKEY_26_On" "BIOKEY_27_On" "BIOKEY_28_On"
## [21] "BIOKEY_29_On" "BIOKEY_3_On"  "BIOKEY_30_On" "BIOKEY_31_On" "BIOKEY_4_On" 
## [26] "BIOKEY_5_On"  "BIOKEY_6_On"  "BIOKEY_7_On"  "BIOKEY_8_On"  "BIOKEY_9_On"
delta2000tcell = delta2000tcell[,-c(13,16,18,22,29)]
sampleDataPatienttcell = sampleDataPatienttcell[-c(13,16,18,22,29),]
expDattcell = expDattcell[,-c(25:26,31:32,35:36,43:44,57:58)]
expDattcellpre = expDattcellpre[,-c(13,16,18,22,29)]
sampleData3tcell = sampleData3tcell[-c(6,17,19,21,26),]

sDPpreert = sampleDataPatienttcell[c(4,9:10,13:15,17,19:20,22:24),]
expDatpreert = expDattcellpre[,c(4,9:10,13:15,17,19:20,22:24)]

expanpreert = sDPpreert$expansion  

expanpreert = as.matrix(expanpreert)
expanpreert = ifelse(sDPpreert$expansion=="E",1,2)
expanpreert = as.matrix(expanpreert)
rownames(expanpreert) = rownames(sDPpreert)
designexpanrownamespreert = sort(rownames(expanpreert))

designexpanpreert = model.matrix(~expanpreert)
rownames(designexpanpreert) = designexpanrownamespreert
colnames(designexpanpreert) = c("Intercept", "NEis2")

expanlimpreert = lmFit(expDatpreert, designexpanpreert)
expanlimpreert = eBayes(expanlimpreert)
ttexpanpreert = topTable(expanlimpreert, coef=2, adjust.method = "BH", n=nrow(expDatpreert))
ttexpanpreert[1:10,]
##              logFC  AveExpr         t     P.Value adj.P.Val         B
## CCL3L1   -4.194562 2.959868 -3.865538 0.001361138 0.6776934 -4.553874
## IGHV3-48 -4.284575 1.226946 -3.662571 0.002090607 0.6776934 -4.556498
## ADAMTS10  2.899390 2.054158  3.585189 0.002462582 0.6776934 -4.557531
## SMPD3     3.672813 1.710439  3.535756 0.002734150 0.6776934 -4.558200
## ZYG11B    3.136617 3.031014  3.526968 0.002785462 0.6776934 -4.558319
## TYW1B     2.603277 2.530765  3.517660 0.002840869 0.6776934 -4.558446
## CD9       2.674077 4.797037  3.511949 0.002875405 0.6776934 -4.558524
## FAF1     -2.961785 2.784412 -3.378501 0.003812843 0.6776934 -4.560373
## ETV1     -3.142633 1.692693 -3.356584 0.003993530 0.6776934 -4.560682
## N4BP3     2.777218 3.124552  3.349226 0.004056079 0.6776934 -4.560786
ttexpanpreert[1:10,]
##              logFC  AveExpr         t     P.Value adj.P.Val         B
## CCL3L1   -4.194562 2.959868 -3.865538 0.001361138 0.6776934 -4.553874
## IGHV3-48 -4.284575 1.226946 -3.662571 0.002090607 0.6776934 -4.556498
## ADAMTS10  2.899390 2.054158  3.585189 0.002462582 0.6776934 -4.557531
## SMPD3     3.672813 1.710439  3.535756 0.002734150 0.6776934 -4.558200
## ZYG11B    3.136617 3.031014  3.526968 0.002785462 0.6776934 -4.558319
## TYW1B     2.603277 2.530765  3.517660 0.002840869 0.6776934 -4.558446
## CD9       2.674077 4.797037  3.511949 0.002875405 0.6776934 -4.558524
## FAF1     -2.961785 2.784412 -3.378501 0.003812843 0.6776934 -4.560373
## ETV1     -3.142633 1.692693 -3.356584 0.003993530 0.6776934 -4.560682
## N4BP3     2.777218 3.124552  3.349226 0.004056079 0.6776934 -4.560786
ttexpanpreertsig=ttexpanpreert[ttexpanpreert$adj.P.Val<0.05,]
ttexpanpreertsig[1:10,]
##      logFC AveExpr  t P.Value adj.P.Val  B
## NA      NA      NA NA      NA        NA NA
## NA.1    NA      NA NA      NA        NA NA
## NA.2    NA      NA NA      NA        NA NA
## NA.3    NA      NA NA      NA        NA NA
## NA.4    NA      NA NA      NA        NA NA
## NA.5    NA      NA NA      NA        NA NA
## NA.6    NA      NA NA      NA        NA NA
## NA.7    NA      NA NA      NA        NA NA
## NA.8    NA      NA NA      NA        NA NA
## NA.9    NA      NA NA      NA        NA NA
rownames(ttexpanpreertsig)
## character(0)
match("IQSEC2", rownames(ttexpanpreert))
## [1] 3254
ttexpanpreert[3254,]
##           logFC   AveExpr        t   P.Value adj.P.Val         B
## IQSEC2 1.252203 -0.105018 1.291257 0.2148901 0.6776934 -4.591703
match("PDCD1", rownames(ttexpanpreert))
## [1] 1361
ttexpanpreert[1361,]
##           logFC  AveExpr         t   P.Value adj.P.Val         B
## PDCD1 -1.859253 5.753456 -1.614609 0.1258668 0.6776934 -4.587265
match("CD163L1", rownames(ttexpanpreert))
## [1] 374
ttexpanpreert[374,]
##            logFC   AveExpr        t    P.Value adj.P.Val         B
## CD163L1 1.894626 0.3767991 2.073418 0.05457495 0.6776934 -4.580301
match("LINC01133", rownames(ttexpanpreert))
## [1] 7469
ttexpanpreert[7469,]
##               logFC    AveExpr         t   P.Value adj.P.Val         B
## LINC01133 0.9811407 -0.3083151 0.8518209 0.4068358 0.6776934 -4.596647
match("HIST1H2AI", rownames(ttexpanpreert))
## [1] 2503
ttexpanpreert[2503,]
##              logFC    AveExpr        t   P.Value adj.P.Val         B
## HIST1H2AI 1.313073 -0.0593656 1.388782 0.1838579 0.6776934 -4.590422
topgene = match(rownames(ttexpanpreert)[1], rownames( expDattcellpre))
t.test (expDatpreert[topgene,]~designexpanpreert[,2])
## 
##  Welch Two Sample t-test
## 
## data:  expDatpreert[topgene, ] by designexpanpreert[, 2]
## t = 5.4166, df = 9.2394, p-value = 0.0003871
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  2.449665 5.939460
## sample estimates:
## mean in group 1 mean in group 2 
##        6.105790        1.911228
topgene
## [1] 3093
expDatpreert[topgene,]
## BIOKEY_12_Pre BIOKEY_17_Pre BIOKEY_18_Pre BIOKEY_21_Pre BIOKEY_22_Pre 
##     6.4918874     0.3204857     5.2385971     0.9197805     1.3308503 
## BIOKEY_24_Pre BIOKEY_27_Pre  BIOKEY_3_Pre BIOKEY_30_Pre  BIOKEY_4_Pre 
##     4.3355261     4.8155313    -0.1254166     2.3972571     3.4542531 
##  BIOKEY_5_Pre  BIOKEY_6_Pre 
##     6.5868855    -0.2472196
ttexpanpreert[1,]
##            logFC  AveExpr         t     P.Value adj.P.Val         B
## CCL3L1 -4.194562 2.959868 -3.865538 0.001361138 0.6776934 -4.553874
designexpanpreert
##               Intercept NEis2
## BIOKEY_12_Pre         1     1
## BIOKEY_17_Pre         1     2
## BIOKEY_18_Pre         1     1
## BIOKEY_21_Pre         1     2
## BIOKEY_22_Pre         1     2
## BIOKEY_24_Pre         1     2
## BIOKEY_27_Pre         1     2
## BIOKEY_3_Pre          1     2
## BIOKEY_30_Pre         1     2
## BIOKEY_4_Pre          1     2
## BIOKEY_5_Pre          1     1
## BIOKEY_6_Pre          1     2
## attr(,"assign")
## [1] 0 1

Positive logFC: group 1 (E) has a lower expression than group 2 (NE)

GSEA er+ pre E vs NE in T cells

ttexpanpreernamest = rownames(ttexpanpreert[1:200,])

##

expanlimpreertstats = expanlimpreert$t[,2]
expanlimpreertstats %>% sort(., decreasing=TRUE) %>% head()
## ADAMTS10    SMPD3   ZYG11B    TYW1B      CD9    N4BP3 
## 3.585189 3.535756 3.526968 3.517660 3.511949 3.349226
expanlimpreertallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpreertstats), keytype="SYMBOL", columns="ENTREZID")

names(expanlimpreertstats) = expanlimpreertallEntrez$ENTREZID[ match(names(expanlimpreertstats), expanlimpreertallEntrez$SYMBOL)]
 
expanlimpreertstats %>% sort(., decreasing=TRUE) %>% head()
##    81794    55512    79699   441250      928    23138 
## 3.585189 3.535756 3.526968 3.517660 3.511949 3.349226
expanlimpreertstats_sort = sort(expanlimpreertstats, decreasing=TRUE)
 
expanlimpreertrPAgsea = gseGO(expanlimpreertstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (36.38% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpreertrPAgsea[1:5,] %>% knitr::kable()
ID Description setSize enrichmentScore NES pvalue p.adjust qvalues rank leading_edge core_enrichment
GO:0007416 GO:0007416 synapse assembly 171 0.4356335 2.372424 0.0001062 0.0023172 0.0009751 5965 tags=51%, list=24%, signal=40% 5020/266727/7476/84189/10160/22941/6495/2049/7097/64084/575/9231/54413/54549/57463/147381/6092/27253/3556/1948/221935/7855/2258/6712/56123/7058/27445/9423/56122/26167/64101/1946/57502/57717/4131/4897/6386/2239/145581/9379/7474/5789/3084/5818/27067/4915/43/119/577/23284/8927/140689/1000/1141/22943/2048/23768/23767/2555/2561/2562/2596/51738/10082/2823/91156/55203/4038/94030/257194/22871/4916/5021/56126/56125/56132/56131/56130/56127/85358/51804/139065/8128/79953/387804/266977/26280/26045
GO:0051963 GO:0051963 regulation of synapse assembly 97 0.4482314 2.235354 0.0001119 0.0023172 0.0009751 7451 tags=58%, list=29%, signal=41% 5020/266727/7476/84189/10160/6495/2049/7097/64084/575/9231/54413/57463/147381/6092/3556/7058/9423/1946/2239/145581/7474/5789/5818/27067/4915/577/1141/22943/2048/23768/23767/51738/10082/94030/257194/22871/4916/5021/51804/139065/8128/79953/387804/26280/26045/627/869/23316/2895/11141/6585/114798/84631/26050/89780
GO:0050907 GO:0050907 detection of chemical stimulus involved in sensory perception 85 0.4569601 2.225958 0.0001127 0.0023172 0.0009751 12933 tags=93%, list=51%, signal=46% 563/5304/1469/5284/50840/50832/26689/1470/50839/338557/259293/259292/83756/7442/143503/284521/81285/9033/8989/1258/727924/4025/26212/26659/123041/259295/259290/54429/765/79290/401427/4994/390429/50838/259289/81472/390072/283694/135924/80835/83597/2779/390445/390174/259286/1472/259296/390083/4993/390067/132112/346562/390075/120065/50834/2780/390882/127623/26246/4995/23538/26664/79473/390275/26539/26188/138881/127069/127064/401992/343172/219429/219447/391109/347468/341416/390326/219865/284532
GO:0002040 GO:0002040 sprouting angiogenesis 123 0.4199129 2.181518 0.0001090 0.0023172 0.0009751 5787 tags=42%, list=23%, signal=33% 8829/7057/28514/187/161742/5170/6915/3791/57556/90993/196740/10014/1948/57608/2523/7422/161198/652/84830/79812/375056/10769/128240/84870/100506013/5228/7424/9037/144455/185/284/168667/29775/1012/54567/1969/2246/2303/51738/26585/4223/4804/5743/57381/6091/9723/9353/90627/6997/7010/63923/2277
GO:0007156 GO:0007156 homophilic cell adhesion via plasma membrane adhesion molecules 165 0.4024236 2.180987 0.0001065 0.0023172 0.0009751 10390 tags=73%, list=41%, signal=43% 83872/1009/5097/2195/11122/81607/64084/64221/91624/54549/1003/57463/6092/27253/221935/54510/55714/57526/9499/1004/56103/1824/56123/1952/54538/56122/26167/9620/57717/1001/56137/5098/26025/5365/9708/5818/5979/64403/90952/92211/57863/1012/1013/1000/60437/1006/4680/152330/1825/120114/79633/91156/84966/51294/5099/5101/56139/56145/56142/56134/56126/56125/56124/56121/56132/56131/56130/56129/56127/56105/56113/56112/56110/56108/56107/56104/56101/56100/56099/6091/128434/1048/56138/56144/56111/8641/53841/54798/2196/84665/28316/54825/84623/28513/27328/5100/56136/56146/56135/56128/56114/1005/152404/9752/1830/57575/56147/1008/1010/1823/1828/56102/56141/6900/56143/56140/29930/56098/1002/389118

pre-treatment tnbc E vs NE in T cells

sDPpretnbct = sampleDataPatienttcell[c(2:3,6:8,11:12,16,21,25:26),]
expDatpretnbct = expDattcellpre[,c(2:3,6:8,11:12,16,21,25:26)]

expanpretnbct = sDPpretnbct$expansion  

expanpretnbct = as.matrix(expanpretnbct)
expanpretnbct = ifelse(sDPpretnbct$expansion=="E",1,2)
expanpretnbct = as.matrix(expanpretnbct)
rownames(expanpretnbct) = rownames(sDPpretnbct)
designexpanrownamespretnbct = sort(rownames(expanpretnbct))

designexpanpretnbct = model.matrix(~expanpretnbct)
rownames(designexpanpretnbct) = designexpanrownamespretnbct
colnames(designexpanpretnbct) = c("Intercept", "NEis2")

expanlimpretnbct = lmFit(expDatpretnbct, designexpanpretnbct)
expanlimpretnbct = eBayes(expanlimpretnbct)
ttexpanpretnbct = topTable(expanlimpretnbct, coef=2, adjust.method = "BH", n=nrow(expDatpretnbct))
ttexpanpretnbct[1:10,]
##                logFC  AveExpr         t      P.Value adj.P.Val         B
## IL21       -4.337764 2.331703 -7.171387 1.450555e-05 0.2494635 -2.001404
## AL359834.1 -4.066904 1.959833 -6.744352 2.587588e-05 0.2494635 -2.086606
## HAVCR2     -3.688488 4.407431 -6.590269 3.206902e-05 0.2494635 -2.120037
## PDCD1      -2.018335 6.705807 -6.061818 6.856912e-05 0.2494635 -2.246902
## RDH10      -3.704796 1.855264 -5.905851 8.643703e-05 0.2494635 -2.288294
## ZBED2      -4.221219 3.003524 -5.474515 1.669260e-04 0.2494635 -2.413265
## LINC01229  -4.147459 0.748824 -5.376912 1.944379e-04 0.2494635 -2.443831
## GOLIM4     -3.082110 3.021727 -5.281352 2.260572e-04 0.2494635 -2.474625
## SARDH      -2.142950 4.035774 -5.205846 2.548706e-04 0.2494635 -2.499577
## MYO7A      -3.623746 3.343656 -5.042378 3.313702e-04 0.2494635 -2.555533
ttexpanpretnbct[1:10,]
##                logFC  AveExpr         t      P.Value adj.P.Val         B
## IL21       -4.337764 2.331703 -7.171387 1.450555e-05 0.2494635 -2.001404
## AL359834.1 -4.066904 1.959833 -6.744352 2.587588e-05 0.2494635 -2.086606
## HAVCR2     -3.688488 4.407431 -6.590269 3.206902e-05 0.2494635 -2.120037
## PDCD1      -2.018335 6.705807 -6.061818 6.856912e-05 0.2494635 -2.246902
## RDH10      -3.704796 1.855264 -5.905851 8.643703e-05 0.2494635 -2.288294
## ZBED2      -4.221219 3.003524 -5.474515 1.669260e-04 0.2494635 -2.413265
## LINC01229  -4.147459 0.748824 -5.376912 1.944379e-04 0.2494635 -2.443831
## GOLIM4     -3.082110 3.021727 -5.281352 2.260572e-04 0.2494635 -2.474625
## SARDH      -2.142950 4.035774 -5.205846 2.548706e-04 0.2494635 -2.499577
## MYO7A      -3.623746 3.343656 -5.042378 3.313702e-04 0.2494635 -2.555533
ttexpanpretnbctsig=ttexpanpretnbct[ttexpanpretnbct$adj.P.Val<0.05,]
ttexpanpretnbctsig[1:10,]
##      logFC AveExpr  t P.Value adj.P.Val  B
## NA      NA      NA NA      NA        NA NA
## NA.1    NA      NA NA      NA        NA NA
## NA.2    NA      NA NA      NA        NA NA
## NA.3    NA      NA NA      NA        NA NA
## NA.4    NA      NA NA      NA        NA NA
## NA.5    NA      NA NA      NA        NA NA
## NA.6    NA      NA NA      NA        NA NA
## NA.7    NA      NA NA      NA        NA NA
## NA.8    NA      NA NA      NA        NA NA
## NA.9    NA      NA NA      NA        NA NA
rownames(ttexpanpretnbctsig)
## character(0)
match("PTN", rownames(ttexpanpretnbct))
## [1] 346
ttexpanpretnbct[346,]
##        logFC     AveExpr        t    P.Value adj.P.Val         B
## PTN 3.246628 -0.04871996 2.905781 0.01374869 0.2494635 -3.563332
match("IQSEC2", rownames(ttexpanpretnbct))
## [1] 17370
ttexpanpretnbct[17370,]
##             logFC    AveExpr          t   P.Value adj.P.Val         B
## IQSEC2 -0.6762304 -0.3744942 -0.7448736 0.4713396 0.6861507 -4.766041
match("CD163L1", rownames(ttexpanpretnbct))
## [1] 16020
ttexpanpretnbct[16020,]
##             logFC    AveExpr        t   P.Value adj.P.Val         B
## CD163L1 0.8426568 -0.6760042 0.885095 0.3942869 0.6223831 -4.714321
match("LINC01133", rownames(ttexpanpretnbct))
## [1] 25099
ttexpanpretnbct[17370,]
##             logFC    AveExpr          t   P.Value adj.P.Val         B
## IQSEC2 -0.6762304 -0.3744942 -0.7448736 0.4713396 0.6861507 -4.766041
match("HIST1H2AI", rownames(ttexpanpretnbct))
## [1] 11843
ttexpanpretnbccan[11843,]
##              logFC     AveExpr        t   P.Value adj.P.Val         B
## IGLV2-18 -1.758434 -0.04561934 -1.04654 0.3125891 0.6674621 -4.594643

Negative logFC: Group 1 (E) has a higher expression than group 2 (NE)

GSEA tnbc pre E vs NE in T cells

##

expanlimpretnbctstats = expanlimpretnbct$t[,2]
expanlimpretnbctstats %>% sort(., decreasing=TRUE) %>% head()
##     VIPR1  IGLV7-46 ZMIZ1-AS1    ZBTB10     SIK1B      FOSB 
##  4.710116  4.579561  4.393848  4.227258  4.204665  4.200772
expanlimpretnbctallEntrez = AnnotationDbi::select(org.Hs.eg.db, keys=names(expanlimpretnbctstats), keytype="SYMBOL", columns="ENTREZID")

names(expanlimpretnbctstats) = expanlimpretnbctallEntrez$ENTREZID[ match(names(expanlimpretnbctstats), expanlimpretnbctallEntrez$SYMBOL)]
 

expanlimpretnbctstats %>% sort(., decreasing=TRUE) %>% head()
##     7433    28775   283050    65986     <NA>     2354 
## 4.710116 4.579561 4.393848 4.227258 4.204665 4.200772
expanlimpretnbctstats_sort = sort(expanlimpretnbctstats, decreasing=TRUE)
 
expanlimpretnbctrPAgsea = gseGO(expanlimpretnbctstats_sort,OrgDb=org.Hs.eg.db, nPerm=10000,
minGSSize=50, pvalueCutoff=0.05, maxGSSize=200, pAdjustMethod="BH", verbose=FALSE)
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (28.9% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
expanlimpretnbctrPAgsea[1:5,] %>% knitr::kable()
ID Description setSize enrichmentScore NES pvalue p.adjust qvalues rank leading_edge core_enrichment
GO:0050907 GO:0050907 detection of chemical stimulus involved in sensory perception 85 0.5410984 2.158437 0.0001026 0.0201266 0.0134323 8894 tags=79%, list=35%, signal=51% 5284/284521/4994/765/7442/1470/338557/81285/9033/8989/1258/727924/4025/26212/26659/123041/50839/54429/79290/401427/390429/50838/259289/81472/283694/135924/80835/83597/2779/390445/390174/259286/1472/343171/259296/390083/4993/390067/132112/346562/390075/120065/50834/2780/390882/127623/26246/4995/23538/26664/79473/390275/26539/26188/138881/127069/127064/401992/343172/219429/219447/391109/347468/341416/390326/219865/284532
GO:0007608 GO:0007608 sensory perception of smell 71 0.5494861 2.145850 0.0001037 0.0201266 0.0134323 8894 tags=80%, list=35%, signal=52% 284521/4685/79846/4994/2535/158046/81285/1258/727924/29989/26212/26659/123041/79290/401427/390429/81472/6531/341359/127534/1813/283694/1262/135924/100507003/390445/390174/343171/390083/4993/390067/54831/390075/120065/390882/127623/26246/4995/23538/26664/79473/390275/26539/26188/138881/127069/127064/401992/343172/219429/219447/391109/347468/341416/390326/219865/284532
GO:0009593 GO:0009593 detection of chemical stimulus 120 0.4801434 1.975622 0.0001013 0.0201266 0.0134323 8894 tags=68%, list=35%, signal=44% 5284/284521/4994/10333/765/30819/7442/27345/284297/845/1470/338557/2645/4825/81285/9033/8989/1258/727924/4025/26212/26659/123041/50839/54429/79290/401427/390429/22953/50838/259289/81472/846/10242/3170/1755/283694/135924/80835/3651/83597/2779/390445/390174/259286/1472/343171/259296/390083/4993/390067/644168/132112/346562/390075/120065/50834/2780/390882/127623/26246/4995/23538/26664/79473/390275/26539/26188/138881/127069/127064/401992/343172/219429/219447/391109/347468/341416/390326/219865/284532
GO:0007606 GO:0007606 sensory perception of chemical stimulus 127 0.4727709 1.954490 0.0001011 0.0201266 0.0134323 9061 tags=69%, list=36%, signal=44% 5284/284521/4685/79846/4994/765/7442/2535/1470/338557/158046/81285/9033/8989/1258/51764/727924/4025/29989/26212/26659/123041/50839/54429/79290/401427/390429/22953/50838/259289/338398/81472/4852/6531/341359/127534/1813/283694/40/1262/135924/80835/100507003/83597/2779/390445/390174/259286/1472/3933/343171/259296/390083/4993/390067/54831/132112/29850/346562/390075/120065/50834/2780/390882/127623/26246/4995/23538/26664/79473/390275/26539/26188/138881/127069/127064/401992/343172/219429/219447/391109/347468/341416/390326/219865/284532/2774
GO:0007218 GO:0007218 neuropeptide signaling pathway 88 0.4315170 1.727733 0.0001024 0.0201266 0.0134323 8895 tags=65%, list=35%, signal=42% 11251/10316/6653/6751/22986/84109/7301/7349/4886/4878/4922/56413/4935/2861/11248/5368/57537/116/181/2925/2847/4923/5179/8811/64106/23620/4988/5697/114815/4852/4887/5539/10886/4889/6863/9607/3061/2587/124274/4985/10888/128674/347148/6755/796/4829/2837/113091/6866/64111/344758/3062/8620/339403/84539/56923/10887

Venn diagrams

pre-treatment E vs Ne analysis: overlapping ER+ and TNBC (top200)

venn( list( "ER+" = rownames(ttexpanpreer[1:200,]), "TNBC" = rownames(ttexpanpretnbc[1:200,])))

overlap <- intersect(rownames(ttexpanpreer[1:200,]), rownames(ttexpanpretnbc[1:200,]))
cat(paste0(overlap, collapse = "\n"))
## SLC30A3
## MB
## IL21

pre-treatment E vs Ne analysis: overlapping ER+ and TNBC (top500)

venn( list( "ER+" = rownames(ttexpanpreer[1:500,]), "TNBC" = rownames(ttexpanpretnbc[1:500,])))

overlapa5 <- intersect(rownames(ttexpanpreer[1:500,]), rownames(ttexpanpretnbc[1:500,]))
cat(paste0(overlap, collapse = "\n"))
## SLC30A3
## MB
## IL21

pre-treatment E vs Ne analysis: overlapping ER+ and TNBC (top1000)

venn( list( "ER+" = rownames(ttexpanpreer[1:1000,]), "TNBC" = rownames(ttexpanpretnbc[1:1000,])))

overlapa1 <- intersect(rownames(ttexpanpreer[1:1000,]), rownames(ttexpanpretnbc[1:1000,]))
cat(paste0(overlap, collapse = "\n"))
## SLC30A3
## MB
## IL21

pre-treatment E vs Ne analysis in cancer cells: overlapping ER+ and TNBC (top200)

venn( list( "ER+" = rownames(ttexpanpreercan[1:200,]), "TNBC" = rownames(ttexpanpretnbccan[1:200,])))

overlap <- intersect(rownames(ttexpanpreercan[1:200,]), rownames(ttexpanpretnbccan[1:200,]))
cat(paste0(overlap, collapse = "\n"))
## IQSEC2

pre-treatment E vs Ne analysis in cancer cells: overlapping ER+ and TNBC (top500)

venn( list( "ER+" = rownames(ttexpanpreercan[1:500,]), "TNBC" = rownames(ttexpanpretnbccan[1:500,])))

overlapc5 <- intersect(rownames(ttexpanpreercan[1:500,]), rownames(ttexpanpretnbccan[1:500,]))
cat(paste0(overlap, collapse = "\n"))
## IQSEC2

pre-treatment E vs Ne analysis in cancer cells: overlapping ER+ and TNBC (top1000)

venn( list( "ER+" = rownames(ttexpanpreercan[1:1000,]), "TNBC" = rownames(ttexpanpretnbccan[1:1000,])))

overlapc1 <- intersect(rownames(ttexpanpreercan[1:1000,]), rownames(ttexpanpretnbccan[1:1000,]))
cat(paste0(overlap, collapse = "\n"))
## IQSEC2

pre-treatment E vs Ne analysis in T cells: overlapping ER+ and TNBC (top200)

venn( list( "ER+" = rownames(ttexpanpreert[1:200,]), "TNBC" = rownames(ttexpanpretnbct[1:200,])))

overlap <- intersect(rownames(ttexpanpreert[1:200,]), rownames(ttexpanpretnbct[1:200,]))
cat(paste0(overlap, collapse = "\n"))
## IL21
## AKAP5
## CXCL13
## AL357054.4

pre-treatment E vs Ne analysis in T cells: overlapping ER+ and TNBC (top500)

venn( list( "ER+" = rownames(ttexpanpreert[1:500,]), "TNBC" = rownames(ttexpanpretnbct[1:500,])))

overlapt5 <- intersect(rownames(ttexpanpreert[1:500,]), rownames(ttexpanpretnbct[1:500,]))
cat(paste0(overlap, collapse = "\n"))
## IL21
## AKAP5
## CXCL13
## AL357054.4

pre-treatment E vs Ne analysis in T cells: overlapping ER+ and TNBC (top1000)

venn( list( "ER+" = rownames(ttexpanpreert[1:1000,]), "TNBC" = rownames(ttexpanpretnbct[1:1000,])))

overlapt1 <- intersect(rownames(ttexpanpreert[1:1000,]), rownames(ttexpanpretnbct[1:1000,]))
cat(paste0(overlap, collapse = "\n"))
## IL21
## AKAP5
## CXCL13
## AL357054.4

Overlapping top 500 cell populations

par(mar = c(0,0,0,0))
venn( list( "A" = overlapa1, "C" = overlapc1, "T" = overlapt1))
mtext('All cells',2, -1, cex = 1.2, las=3)

overlapat <- intersect(overlapa1, overlapt1)
overlapac <- intersect(overlapa1, overlapc1)
cat(paste0(overlapat, collapse = "\n"))
## SLC30A3
## IL21
## PDXP
## AC017002.3
## KIR2DL4
## PTN
## CXCL13
cat(paste0(overlapac, collapse = "\n"))
## IQSEC2
## MB
## CRB3
## SLC4A1

Plotting expression levels of the PTN gene

barplot(deltag["IQSEC2",], las = 3, col = bctype_cc)

barplot(expDatpre["IQSEC2",], las = 3, col = bctype_cc, margins=c(2,2,2,2))

Plotting IQSEC and other genes

barplot(expDatprecan["IQSEC2",], las = 3, col = bctype_cc, margins=c(2,2,2,2))
abline(h=4.4, col='black', lty=5)
abline(h=3.63, col='black', lty=1)
abline(h=1.13, col='blue', lty=1)
abline(h=0.3, col='blue', lty=5)
abline(h=4.95, col='purple', lty=5)
abline(h=0.95, col='purple', lty=1)

data = as.matrix(expDatprecan["IQSEC2",])

df <- data[order(data,decreasing = F),]
df
## BIOKEY_12_Pre  BIOKEY_5_Pre BIOKEY_18_Pre  BIOKEY_3_Pre  BIOKEY_6_Pre 
##    -1.3010409    -0.3545796     0.3088695     1.1346558     1.4179783 
## BIOKEY_26_Pre BIOKEY_19_Pre BIOKEY_14_Pre  BIOKEY_1_Pre BIOKEY_13_Pre 
##     2.0767493     2.1036044     2.3450765     2.8347727     2.8567338 
##  BIOKEY_8_Pre BIOKEY_24_Pre  BIOKEY_2_Pre  BIOKEY_9_Pre  BIOKEY_4_Pre 
##     3.2178139     3.5800400     3.6175320     3.6374442     3.9794584 
## BIOKEY_27_Pre BIOKEY_21_Pre BIOKEY_22_Pre BIOKEY_16_Pre BIOKEY_10_Pre 
##     4.1522259     4.3407669     4.3768907     4.4016459     4.7572801 
## BIOKEY_30_Pre BIOKEY_28_Pre BIOKEY_11_Pre BIOKEY_17_Pre BIOKEY_15_Pre 
##     4.9135300     4.9708502     5.0338518     5.1864848     5.4364103 
## BIOKEY_31_Pre 
##     5.8043810
expDatpre["IQSEC2",]
##  T_BIOKEY_1_Pre T_BIOKEY_10_Pre T_BIOKEY_11_Pre E_BIOKEY_12_Pre H_BIOKEY_13_Pre 
##       0.8592318       3.9862237       4.0861234       1.2662461       2.0609353 
## T_BIOKEY_14_Pre T_BIOKEY_15_Pre T_BIOKEY_16_Pre E_BIOKEY_17_Pre E_BIOKEY_18_Pre 
##       1.2416261       4.3654499       1.9916973       4.5569787       2.3700212 
## T_BIOKEY_19_Pre  T_BIOKEY_2_Pre E_BIOKEY_21_Pre E_BIOKEY_22_Pre E_BIOKEY_24_Pre 
##       1.8818208       3.3461626       4.1603064       3.8951549       3.6275542 
## T_BIOKEY_26_Pre E_BIOKEY_27_Pre H_BIOKEY_28_Pre  E_BIOKEY_3_Pre E_BIOKEY_30_Pre 
##       1.9205495       2.8262734       3.5882158       3.5993576       4.3830468 
## T_BIOKEY_31_Pre  E_BIOKEY_4_Pre  E_BIOKEY_5_Pre  E_BIOKEY_6_Pre  T_BIOKEY_8_Pre 
##       5.2361344       2.4093407      -3.8991871       1.6153011       2.1517382 
##  T_BIOKEY_9_Pre 
##       3.4817072
#barplot(df, las=3, col=bctype, ylab="Relative expression of CD163L1", cex.lab=1.2, cex.axis=1.2, cex=1.2)
#bctype <- bctype_cc[order(df)]

par(mar = c(14,5,2,2))
barplot(expDatprecan["MB",], las = 3, col = bctype_cc, margins=c(14,2,2,2))

barplot(expDatprecan["CD48",], las = 3, col = bctype_cc)

barplot(expDatprecan["SLC4A1",], las = 3, col = bctype_cc, ylab = "Relative expression of SLC4A1", cex.lab=1.2, cex.axis=1.2, cex=1.2)
mtext(" Tumour sample", 1, 11, cex=1.2)

par(mar = c(13,5,2,0))
barplot(expDatpre["STX8",], las = 3, col = bctype_cc, ylab = "Relative expression of STX8", cex.lab=1.2, cex.axis=1.2, cex=1.2, ylim = c(-1,6))
mtext(" Tumour sample", 1, 11, cex=1.2)

barplot(expDatprecan["XBP1",], las = 3, col = bctype_cc, ylab = "Relative expression of XBP1", cex.lab=1.2, cex.axis=1.2, cex=1.2)
mtext(" Tumour sample", 1, 11, cex=1.2)

barplot(expDattcellpre["STX8",], las = 3, col = bctype_cc, ylab = "Relative expression of STX8", cex.lab=1.2, cex.axis=1.2, cex=1.2)
mtext(" Tumour sample", 1, 11, cex=1.2)

#FBXO34
barplot(expDatprecan["FMOD",], las = 3, col = bctype_cc, margins=c(2,2,2,2))

barplot(expDatprecan["OCRL",], las = 3, col = bctype_cc, margins=c(2,2,2,2))

barplot(expDatprecan["CTIF",], las = 3, col = bctype_cc, margins=c(2,2,2,2))

barplot(expDattcellpre["IGHV3-48",], las = 3, col = bctype_cc, margins=c(2,2,2,2))

GOSEQ

preergenes <- ifelse((ttexpanpreer$logFC>2)&(ttexpanpreer$adj.P.Val<0.5),1,0)

names(preergenes) = rownames(ttexpanpreer)

pepwf = nullp(preergenes, "hg19", "geneSymbol")

head(pepwf[1:10,])
##            DEgenes bias.data         pwf
## CD163L1          1      4600 0.001150576
## AC025283.2       1        NA          NA
## KRT17            1      1582 0.001403121
## CDKL2            0      4726 0.001150025
## RPP40            1      1166 0.001438165
## TACSTD2          1      2071 0.001327559
peGO.wall = goseq( pepwf, "hg19", "geneSymbol")
head(peGO.wall)
##         category over_represented_pvalue under_represented_pvalue numDEInCat
## 17168 GO:0090191            0.0005040954                1.0000000          1
## 85    GO:0000171            0.0005397222                0.9999999          1
## 21162 GO:1905267            0.0007250070                0.9999999          1
## 19017 GO:1900028            0.0016909112                0.9999991          1
## 2660  GO:0005655            0.0017776624                0.9999990          1
## 21194 GO:1905331            0.0018507216                0.9999989          1
##       numInCat
## 17168        2
## 85           2
## 21162        3
## 19017        7
## 2660         7
## 21194        8
##                                                                          term
## 17168 negative regulation of branching involved in ureteric bud morphogenesis
## 85                                                  ribonuclease MRP activity
## 21162                    endonucleolytic cleavage involved in tRNA processing
## 19017                                  negative regulation of ruffle assembly
## 2660                                         nucleolar ribonuclease P complex
## 21194                   negative regulation of morphogenesis of an epithelium
##       ontology
## 17168       BP
## 85          MF
## 21162       BP
## 19017       BP
## 2660        CC
## 21194       BP
peggosig = peGO.wall[peGO.wall$numInCat<500,]
head(peggosig)
##         category over_represented_pvalue under_represented_pvalue numDEInCat
## 17168 GO:0090191            0.0005040954                1.0000000          1
## 85    GO:0000171            0.0005397222                0.9999999          1
## 21162 GO:1905267            0.0007250070                0.9999999          1
## 19017 GO:1900028            0.0016909112                0.9999991          1
## 2660  GO:0005655            0.0017776624                0.9999990          1
## 21194 GO:1905331            0.0018507216                0.9999989          1
##       numInCat
## 17168        2
## 85           2
## 21162        3
## 19017        7
## 2660         7
## 21194        8
##                                                                          term
## 17168 negative regulation of branching involved in ureteric bud morphogenesis
## 85                                                  ribonuclease MRP activity
## 21162                    endonucleolytic cleavage involved in tRNA processing
## 19017                                  negative regulation of ruffle assembly
## 2660                                         nucleolar ribonuclease P complex
## 21194                   negative regulation of morphogenesis of an epithelium
##       ontology
## 17168       BP
## 85          MF
## 21162       BP
## 19017       BP
## 2660        CC
## 21194       BP
peGO.padj <- as.matrix(p.adjust(peggosig$over_represented_pvalue, method="BH"))
sum(peGO.padj < 0.05)
## [1] 0
head(peGO.padj)
##      [,1]
## [1,]    1
## [2,]    1
## [3,]    1
## [4,]    1
## [5,]    1
## [6,]    1
rownames(peGO.padj) = peggosig$term
colnames(peGO.padj) = "GO term"
head(peGO.padj)
##                                                                         GO term
## negative regulation of branching involved in ureteric bud morphogenesis       1
## ribonuclease MRP activity                                                     1
## endonucleolytic cleavage involved in tRNA processing                          1
## negative regulation of ruffle assembly                                        1
## nucleolar ribonuclease P complex                                              1
## negative regulation of morphogenesis of an epithelium                         1
pretnbcgenes <- ifelse((ttexpanpretnbc$logFC>2) & (ttexpanpretnbc$adj.P.Val<0.5),1,0)


names(pretnbcgenes) = rownames(ttexpanpretnbc)

ptpwf = nullp(pretnbcgenes, "hg19", "geneSymbol")

head(ptpwf[1:10,])
##            DEgenes bias.data        pwf
## HIST1H2AI        0     469.0 0.03161733
## EBF3             1    4377.0 0.04434441
## LINC01484        0        NA         NA
## SLC30A3          0    2100.0 0.03822885
## AL606807.1       0        NA         NA
## EDA              1    1188.5 0.03349868
ptGO.wall = goseq( ptpwf, "hg19", "geneSymbol")
head(ptGO.wall)
##         category over_represented_pvalue under_represented_pvalue numDEInCat
## 8440  GO:0032501            7.710725e-24                        1        379
## 7705  GO:0031226            4.480386e-21                        1        143
## 2803  GO:0005887            7.050524e-21                        1        138
## 13097 GO:0048731            9.960882e-20                        1        263
## 16531 GO:0071944            5.946207e-18                        1        325
## 3677  GO:0007275            2.622933e-17                        1        275
##       numInCat                                   term ontology
## 8440      6366       multicellular organismal process       BP
## 7705      1590 intrinsic component of plasma membrane       CC
## 2803      1510  integral component of plasma membrane       CC
## 13097     3988                     system development       BP
## 16531     5498                         cell periphery       CC
## 3677      4416     multicellular organism development       BP
ptggosig = ptGO.wall[ptGO.wall$numInCat<500,]
head(ptggosig)
##         category over_represented_pvalue under_represented_pvalue numDEInCat
## 17923 GO:0098742            9.637264e-14                        1         42
## 15357 GO:0062023            2.276289e-11                        1         47
## 7405  GO:0030545            3.684846e-11                        1         48
## 3811  GO:0008015            8.505493e-11                        1         51
## 7406  GO:0030546            1.799592e-10                        1         44
## 12800 GO:0048018            3.279963e-10                        1         43
##       numInCat                                                      term
## 17923      269 cell-cell adhesion via plasma-membrane adhesion molecules
## 15357      401                  collagen-containing extracellular matrix
## 7405       456                     signaling receptor regulator activity
## 3811       475                                         blood circulation
## 7406       416                     signaling receptor activator activity
## 12800      409                                  receptor ligand activity
##       ontology
## 17923       BP
## 15357       CC
## 7405        MF
## 3811        BP
## 7406        MF
## 12800       MF
ptGO.padj <- as.matrix(p.adjust(ptggosig$over_represented_pvalue, method="BH"))
sum(ptGO.padj < 0.05)
## [1] 145
head(ptGO.padj)
##              [,1]
## [1,] 2.109886e-09
## [2,] 2.491740e-07
## [3,] 2.689078e-07
## [4,] 4.655269e-07
## [5,] 7.879693e-07
## [6,] 1.196804e-06
rownames(ptGO.padj) = ptggosig$term
colnames(ptGO.padj) = "GO term"
head(ptGO.padj)
##                                                                GO term
## cell-cell adhesion via plasma-membrane adhesion molecules 2.109886e-09
## collagen-containing extracellular matrix                  2.491740e-07
## signaling receptor regulator activity                     2.689078e-07
## blood circulation                                         4.655269e-07
## signaling receptor activator activity                     7.879693e-07
## receptor ligand activity                                  1.196804e-06
#ptgosigpadj = ptGO.padj[ptGO.wall$numInCat<500,]
#head(ptgosigpadj)
preercangenes <- ifelse((ttexpanpreercan$logFC>2) & (ttexpanpreercan$adj.P.Val<0.5),1,0)

names(preercangenes) = rownames(ttexpanpreercan)

pecpwf = nullp(preercangenes, "hg19", "geneSymbol")

head(pecpwf[1:10,])
##         DEgenes bias.data        pwf
## STX8          1     975.0 0.06810799
## TIMM22        1    1675.0 0.08843614
## ARHGEF7       1    5393.0 0.09634920
## MKRN2         1    2771.0 0.09563165
## PARP3         1    2366.5 0.09522666
## BORCS5        1        NA         NA
pecGO.wall = goseq( pecpwf, "hg19", "geneSymbol")
head(pecGO.wall)
##         category over_represented_pvalue under_represented_pvalue numDEInCat
## 2699  GO:0005737            4.464147e-13                        1       1093
## 2648  GO:0005622            5.360956e-12                        1       1292
## 11101 GO:0043227            6.344265e-10                        1       1165
## 20289 GO:1903561            6.887543e-09                        1        241
## 15462 GO:0070062            7.169034e-09                        1        239
## 11104 GO:0043230            7.205187e-09                        1        241
##       numInCat                               term ontology
## 2699     11024                          cytoplasm       CC
## 2648     13540 intracellular anatomical structure       CC
## 11101    12102         membrane-bounded organelle       CC
## 20289     1988              extracellular vesicle       CC
## 15462     1970              extracellular exosome       CC
## 11104     1989            extracellular organelle       CC
pecggosig = pecGO.wall[pecGO.wall$numInCat<500,]
head(pecggosig)
##         category over_represented_pvalue under_represented_pvalue numDEInCat
## 15357 GO:0062023            5.860226e-08                1.0000000         68
## 8381  GO:0032432            1.078969e-06                0.9999998         21
## 2468  GO:0005201            1.161613e-06                0.9999996         35
## 10041 GO:0035966            1.779983e-06                0.9999994         33
## 9527  GO:0034976            3.257405e-06                0.9999987         45
## 16870 GO:0072659            1.217723e-05                0.9999946         47
##       numInCat                                        term ontology
## 15357      401    collagen-containing extracellular matrix       CC
## 8381        73                       actin filament bundle       CC
## 2468       162 extracellular matrix structural constituent       MF
## 10041      155 response to topologically incorrect protein       BP
## 9527       248    response to endoplasmic reticulum stress       BP
## 16870      276     protein localization to plasma membrane       BP
pecGO.padj <- as.matrix(p.adjust(pecggosig$over_represented_pvalue, method="BH"))
sum(pecGO.padj < 0.05)
## [1] 8
head(pecGO.padj)
##             [,1]
## [1,] 0.001282979
## [2,] 0.008477067
## [3,] 0.008477067
## [4,] 0.009742293
## [5,] 0.014262875
## [6,] 0.044432680
rownames(pecGO.padj) = pecggosig$term
colnames(pecGO.padj) = "GO term"
head(pecGO.padj)
##                                                 GO term
## collagen-containing extracellular matrix    0.001282979
## actin filament bundle                       0.008477067
## extracellular matrix structural constituent 0.008477067
## response to topologically incorrect protein 0.009742293
## response to endoplasmic reticulum stress    0.014262875
## protein localization to plasma membrane     0.044432680
pretnbccangenes <- ifelse((ttexpanpretnbccan$logFC>2) & (ttexpanpretnbccan$adj.P.Val<0.5),1,0)

names(pretnbccangenes) = rownames(ttexpanpretnbccan)

ptcpwf = nullp(pretnbccangenes, "hg19", "geneSymbol")

head(ptcpwf[1:10,])
##            DEgenes bias.data         pwf
## LINC01133        0        NA          NA
## RNF128           0    2763.5 0.001003914
## SPIRE2           0    3259.0 0.001002940
## CXCL13           0    1206.0 0.001004409
## NXPH3            0    5552.0 0.001000005
## AP001453.2       0        NA          NA
ptcGO.wall = goseq( ptcpwf, "hg19", "geneSymbol")
head(ptcGO.wall)
##     category over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0000002                       1                        1          0
## 2 GO:0000003                       1                        1          0
## 3 GO:0000009                       1                        1          0
## 4 GO:0000010                       1                        1          0
## 5 GO:0000012                       1                        1          0
## 6 GO:0000014                       1                        1          0
##   numInCat                                               term ontology
## 1       31                   mitochondrial genome maintenance       BP
## 2     1329                                       reproduction       BP
## 3        2             alpha-1,6-mannosyltransferase activity       MF
## 4        2          trans-hexaprenyltranstransferase activity       MF
## 5       11                         single strand break repair       BP
## 6       10 single-stranded DNA endodeoxyribonuclease activity       MF
ptcggosig = ptcGO.wall[ptcGO.wall$numInCat<500,]
head(ptcggosig)
##     category over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0000002                       1                        1          0
## 3 GO:0000009                       1                        1          0
## 4 GO:0000010                       1                        1          0
## 5 GO:0000012                       1                        1          0
## 6 GO:0000014                       1                        1          0
## 7 GO:0000015                       1                        1          0
##   numInCat                                               term ontology
## 1       31                   mitochondrial genome maintenance       BP
## 3        2             alpha-1,6-mannosyltransferase activity       MF
## 4        2          trans-hexaprenyltranstransferase activity       MF
## 5       11                         single strand break repair       BP
## 6       10 single-stranded DNA endodeoxyribonuclease activity       MF
## 7        4                  phosphopyruvate hydratase complex       CC
ptcGO.padj <- as.matrix(p.adjust(ptcggosig$over_represented_pvalue, method="BH"))
sum(ptcGO.padj < 0.05)
## [1] 0
head(ptcGO.padj)
##      [,1]
## [1,]    1
## [2,]    1
## [3,]    1
## [4,]    1
## [5,]    1
## [6,]    1
rownames(ptcGO.padj) = ptcggosig$term
colnames(ptcGO.padj) = "GO term"
head(ptcGO.padj)
##                                                    GO term
## mitochondrial genome maintenance                         1
## alpha-1,6-mannosyltransferase activity                   1
## trans-hexaprenyltranstransferase activity                1
## single strand break repair                               1
## single-stranded DNA endodeoxyribonuclease activity       1
## phosphopyruvate hydratase complex                        1
preertgenes <- ifelse((ttexpanpreert$logFC>2) & (ttexpanpreert$adj.P.Val<0.5),1,0)

names(preertgenes) = rownames(ttexpanpreert)

petpwf = nullp(preertgenes, "hg19", "geneSymbol")

head(petpwf[1:10,])
##          DEgenes bias.data   pwf
## CCL3L1         0     786.0 0.001
## IGHV3-48       0        NA    NA
## ADAMTS10       0    4263.0 0.001
## SMPD3          0    5276.0 0.001
## ZYG11B         0    8154.0 0.001
## TYW1B          0    2393.5 0.001
petGO.wall = goseq( petpwf, "hg19", "geneSymbol")
head(petGO.wall)
##     category over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0000002                       1                        1          0
## 2 GO:0000003                       1                        1          0
## 3 GO:0000009                       1                        1          0
## 4 GO:0000010                       1                        1          0
## 5 GO:0000012                       1                        1          0
## 6 GO:0000014                       1                        1          0
##   numInCat                                               term ontology
## 1       31                   mitochondrial genome maintenance       BP
## 2     1329                                       reproduction       BP
## 3        2             alpha-1,6-mannosyltransferase activity       MF
## 4        2          trans-hexaprenyltranstransferase activity       MF
## 5       11                         single strand break repair       BP
## 6       10 single-stranded DNA endodeoxyribonuclease activity       MF
petggosig = petGO.wall[petGO.wall$numInCat<500,]
head(petggosig)
##     category over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0000002                       1                        1          0
## 3 GO:0000009                       1                        1          0
## 4 GO:0000010                       1                        1          0
## 5 GO:0000012                       1                        1          0
## 6 GO:0000014                       1                        1          0
## 7 GO:0000015                       1                        1          0
##   numInCat                                               term ontology
## 1       31                   mitochondrial genome maintenance       BP
## 3        2             alpha-1,6-mannosyltransferase activity       MF
## 4        2          trans-hexaprenyltranstransferase activity       MF
## 5       11                         single strand break repair       BP
## 6       10 single-stranded DNA endodeoxyribonuclease activity       MF
## 7        4                  phosphopyruvate hydratase complex       CC
petGO.padj <- as.matrix(p.adjust(petggosig$over_represented_pvalue, method="BH"))
sum(petGO.padj < 0.05)
## [1] 0
head(petGO.padj)
##      [,1]
## [1,]    1
## [2,]    1
## [3,]    1
## [4,]    1
## [5,]    1
## [6,]    1
rownames(petGO.padj) = petggosig$term
colnames(petGO.padj) = "GO term"
head(petGO.padj)
##                                                    GO term
## mitochondrial genome maintenance                         1
## alpha-1,6-mannosyltransferase activity                   1
## trans-hexaprenyltranstransferase activity                1
## single strand break repair                               1
## single-stranded DNA endodeoxyribonuclease activity       1
## phosphopyruvate hydratase complex                        1
pretnbctgenes <- ifelse((ttexpanpretnbct$logFC>2) & (ttexpanpretnbct$adj.P.Val<0.5),1,0)

names(pretnbctgenes) = rownames(ttexpanpretnbct)

pttpwf = nullp(pretnbctgenes, "hg19", "geneSymbol")

head(pttpwf[1:10,])
##            DEgenes bias.data        pwf
## IL21             0       621 0.03829179
## AL359834.1       0        NA         NA
## HAVCR2           0      2327 0.03980521
## PDCD1            0      2116 0.03961800
## RDH10            0      3535 0.04087719
## ZBED2            0      2188 0.03968188
pttGO.wall = goseq(pttpwf, "hg19", "geneSymbol")
head(pttGO.wall)
##         category over_represented_pvalue under_represented_pvalue numDEInCat
## 16531 GO:0071944            4.508184e-18                        1        340
## 2802  GO:0005886            2.880957e-12                        1        297
## 7303  GO:0030312            2.304307e-10                        1         56
## 2590  GO:0005509            2.443299e-10                        1         65
## 2803  GO:0005887            5.121150e-10                        1        113
## 7597  GO:0031012            5.898720e-10                        1         55
##       numInCat                                  term ontology
## 16531     5498                        cell periphery       CC
## 2802      5049                       plasma membrane       CC
## 7303       533      external encapsulating structure       CC
## 2590       667                   calcium ion binding       MF
## 2803      1510 integral component of plasma membrane       CC
## 7597       532                  extracellular matrix       CC
pttggosig = pttGO.wall[pttGO.wall$numInCat<500,]
head(pttggosig)
##         category over_represented_pvalue under_represented_pvalue numDEInCat
## 15357 GO:0062023            5.387174e-09                1.0000000         44
## 6644  GO:0019814            4.389752e-06                0.9999989         20
## 3594  GO:0007156            9.658510e-06                0.9999972         21
## 2307  GO:0004930            1.064044e-05                0.9999957         36
## 7405  GO:0030545            1.282822e-05                0.9999946         39
## 439   GO:0001525            1.652530e-05                0.9999927         42
##       numInCat                                                            term
## 15357      401                        collagen-containing extracellular matrix
## 6644       151                                          immunoglobulin complex
## 3594       165 homophilic cell adhesion via plasma membrane adhesion molecules
## 2307       400                             G protein-coupled receptor activity
## 7405       456                           signaling receptor regulator activity
## 439        496                                                    angiogenesis
##       ontology
## 15357       CC
## 6644        CC
## 3594        BP
## 2307        MF
## 7405        MF
## 439         BP
pttGO.padj <- as.matrix(p.adjust(pttggosig$over_represented_pvalue, method="BH"))
sum(pttGO.padj < 0.05)
## [1] 2
head(pttGO.padj)
##              [,1]
## [1,] 0.0001179414
## [2,] 0.0480524198
## [3,] 0.0561696487
## [4,] 0.0561696487
## [5,] 0.0561696487
## [6,] 0.0602980769
rownames(pttGO.padj) = pttggosig$term
colnames(pttGO.padj) = "GO term"
head(pttGO.padj)
##                                                                      GO term
## collagen-containing extracellular matrix                        0.0001179414
## immunoglobulin complex                                          0.0480524198
## homophilic cell adhesion via plasma membrane adhesion molecules 0.0561696487
## G protein-coupled receptor activity                             0.0561696487
## signaling receptor regulator activity                           0.0561696487
## angiogenesis                                                    0.0602980769

T CELL analysis (still in progress)

barplot(expDattcellpre["IGLV2-8",], las = 3, col = bctype_cc, margins=c(2,2,2,2))
## Warning in plot.window(xlim, ylim, log = log, ...): "margins" is not a graphical
## parameter
## Warning in axis(if (horiz) 2 else 1, at = at.l, labels = names.arg, lty =
## axis.lty, : "margins" is not a graphical parameter
## Warning in title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...):
## "margins" is not a graphical parameter
## Warning in axis(if (horiz) 1 else 2, cex.axis = cex.axis, ...): "margins" is not
## a graphical parameter